using System;

namespace HashLib.Crypto.SHA3
{
    internal class Groestl224 : Groestl256Base
    {
        public Groestl224()
            : base(HashLib.HashSize.HashSize224)
        {
        }
    }

    internal class Groestl256 : Groestl256Base
    {
        public Groestl256()
            : base(HashLib.HashSize.HashSize256)
        {
        }
    }

    internal class Groestl384 : Groestl512Base
    {
        public Groestl384()
            : base(HashLib.HashSize.HashSize384)
        {
        }
    }

    internal class Groestl512 : Groestl512Base
    {
        public Groestl512()
            : base(HashLib.HashSize.HashSize512)
        {
        }
    }

    internal abstract class GroestlBase : BlockHash, ICryptoNotBuildIn
    {
        #region Consts
        protected static readonly ulong[] s_T = 
        {
            0xC6A597F4A5F432C6, 0xF884EB9784976FF8, 0xEE99C7B099B05EEE, 0xF68DF78C8D8C7AF6,  
            0xFF0DE5170D17E8FF, 0xD6BDB7DCBDDC0AD6, 0xDEB1A7C8B1C816DE, 0x915439FC54FC6D91,  
            0x6050C0F050F09060, 0x0203040503050702, 0xCEA987E0A9E02ECE, 0x567DAC877D87D156,  
            0xE719D52B192BCCE7, 0xB56271A662A613B5, 0x4DE69A31E6317C4D, 0xEC9AC3B59AB559EC,  
            0x8F4505CF45CF408F, 0x1F9D3EBC9DBCA31F, 0x894009C040C04989, 0xFA87EF92879268FA,  
            0xEF15C53F153FD0EF, 0xB2EB7F26EB2694B2, 0x8EC90740C940CE8E, 0xFB0BED1D0B1DE6FB,  
            0x41EC822FEC2F6E41, 0xB3677DA967A91AB3, 0x5FFDBE1CFD1C435F, 0x45EA8A25EA256045,  
            0x23BF46DABFDAF923, 0x53F7A602F7025153, 0xE496D3A196A145E4, 0x9B5B2DED5BED769B,  
            0x75C2EA5DC25D2875, 0xE11CD9241C24C5E1, 0x3DAE7AE9AEE9D43D, 0x4C6A98BE6ABEF24C,  
            0x6C5AD8EE5AEE826C, 0x7E41FCC341C3BD7E, 0xF502F1060206F3F5, 0x834F1DD14FD15283,  
            0x685CD0E45CE48C68, 0x51F4A207F4075651, 0xD134B95C345C8DD1, 0xF908E9180818E1F9,  
            0xE293DFAE93AE4CE2, 0xAB734D9573953EAB, 0x6253C4F553F59762, 0x2A3F54413F416B2A,  
            0x080C10140C141C08, 0x955231F652F66395, 0x46658CAF65AFE946, 0x9D5E21E25EE27F9D,  
            0x3028607828784830, 0x37A16EF8A1F8CF37, 0x0A0F14110F111B0A, 0x2FB55EC4B5C4EB2F,  
            0x0E091C1B091B150E, 0x2436485A365A7E24, 0x1B9B36B69BB6AD1B, 0xDF3DA5473D4798DF,  
            0xCD26816A266AA7CD, 0x4E699CBB69BBF54E, 0x7FCDFE4CCD4C337F, 0xEA9FCFBA9FBA50EA,  
            0x121B242D1B2D3F12, 0x1D9E3AB99EB9A41D, 0x5874B09C749CC458, 0x342E68722E724634,  
            0x362D6C772D774136, 0xDCB2A3CDB2CD11DC, 0xB4EE7329EE299DB4, 0x5BFBB616FB164D5B,  
            0xA4F65301F601A5A4, 0x764DECD74DD7A176, 0xB76175A361A314B7, 0x7DCEFA49CE49347D,  
            0x527BA48D7B8DDF52, 0xDD3EA1423E429FDD, 0x5E71BC937193CD5E, 0x139726A297A2B113,  
            0xA6F55704F504A2A6, 0xB96869B868B801B9, 0x0000000000000000, 0xC12C99742C74B5C1,  
            0x406080A060A0E040, 0xE31FDD211F21C2E3, 0x79C8F243C8433A79, 0xB6ED772CED2C9AB6,  
            0xD4BEB3D9BED90DD4, 0x8D4601CA46CA478D, 0x67D9CE70D9701767, 0x724BE4DD4BDDAF72,  
            0x94DE3379DE79ED94, 0x98D42B67D467FF98, 0xB0E87B23E82393B0, 0x854A11DE4ADE5B85,  
            0xBB6B6DBD6BBD06BB, 0xC52A917E2A7EBBC5, 0x4FE59E34E5347B4F, 0xED16C13A163AD7ED,  
            0x86C51754C554D286, 0x9AD72F62D762F89A, 0x6655CCFF55FF9966, 0x119422A794A7B611,  
            0x8ACF0F4ACF4AC08A, 0xE910C9301030D9E9, 0x0406080A060A0E04, 0xFE81E798819866FE,  
            0xA0F05B0BF00BABA0, 0x7844F0CC44CCB478, 0x25BA4AD5BAD5F025, 0x4BE3963EE33E754B,  
            0xA2F35F0EF30EACA2, 0x5DFEBA19FE19445D, 0x80C01B5BC05BDB80, 0x058A0A858A858005,  
            0x3FAD7EECADECD33F, 0x21BC42DFBCDFFE21, 0x7048E0D848D8A870, 0xF104F90C040CFDF1,  
            0x63DFC67ADF7A1963, 0x77C1EE58C1582F77, 0xAF75459F759F30AF, 0x426384A563A5E742,  
            0x2030405030507020, 0xE51AD12E1A2ECBE5, 0xFD0EE1120E12EFFD, 0xBF6D65B76DB708BF,  
            0x814C19D44CD45581, 0x1814303C143C2418, 0x26354C5F355F7926, 0xC32F9D712F71B2C3,  
            0xBEE16738E13886BE, 0x35A26AFDA2FDC835, 0x88CC0B4FCC4FC788, 0x2E395C4B394B652E,  
            0x93573DF957F96A93, 0x55F2AA0DF20D5855, 0xFC82E39D829D61FC, 0x7A47F4C947C9B37A,  
            0xC8AC8BEFACEF27C8, 0xBAE76F32E73288BA, 0x322B647D2B7D4F32, 0xE695D7A495A442E6,  
            0xC0A09BFBA0FB3BC0, 0x199832B398B3AA19, 0x9ED12768D168F69E, 0xA37F5D817F8122A3,  
            0x446688AA66AAEE44, 0x547EA8827E82D654, 0x3BAB76E6ABE6DD3B, 0x0B83169E839E950B,  
            0x8CCA0345CA45C98C, 0xC729957B297BBCC7, 0x6BD3D66ED36E056B, 0x283C50443C446C28,  
            0xA779558B798B2CA7, 0xBCE2633DE23D81BC, 0x161D2C271D273116, 0xAD76419A769A37AD,  
            0xDB3BAD4D3B4D96DB, 0x6456C8FA56FA9E64, 0x744EE8D24ED2A674, 0x141E28221E223614,  
            0x92DB3F76DB76E492, 0x0C0A181E0A1E120C, 0x486C90B46CB4FC48, 0xB8E46B37E4378FB8,  
            0x9F5D25E75DE7789F, 0xBD6E61B26EB20FBD, 0x43EF862AEF2A6943, 0xC4A693F1A6F135C4,  
            0x39A872E3A8E3DA39, 0x31A462F7A4F7C631, 0xD337BD5937598AD3, 0xF28BFF868B8674F2,  
            0xD532B156325683D5, 0x8B430DC543C54E8B, 0x6E59DCEB59EB856E, 0xDAB7AFC2B7C218DA,  
            0x018C028F8C8F8E01, 0xB16479AC64AC1DB1, 0x9CD2236DD26DF19C, 0x49E0923BE03B7249,  
            0xD8B4ABC7B4C71FD8, 0xACFA4315FA15B9AC, 0xF307FD090709FAF3, 0xCF25856F256FA0CF,  
            0xCAAF8FEAAFEA20CA, 0xF48EF3898E897DF4, 0x47E98E20E9206747, 0x1018202818283810,  
            0x6FD5DE64D5640B6F, 0xF088FB83888373F0, 0x4A6F94B16FB1FB4A, 0x5C72B8967296CA5C,  
            0x3824706C246C5438, 0x57F1AE08F1085F57, 0x73C7E652C7522173, 0x975135F351F36497,  
            0xCB238D652365AECB, 0xA17C59847C8425A1, 0xE89CCBBF9CBF57E8, 0x3E217C6321635D3E,  
            0x96DD377CDD7CEA96, 0x61DCC27FDC7F1E61, 0x0D861A9186919C0D, 0x0F851E9485949B0F,  
            0xE090DBAB90AB4BE0, 0x7C42F8C642C6BA7C, 0x71C4E257C4572671, 0xCCAA83E5AAE529CC,  
            0x90D83B73D873E390, 0x06050C0F050F0906, 0xF701F5030103F4F7, 0x1C12383612362A1C,  
            0xC2A39FFEA3FE3CC2, 0x6A5FD4E15FE18B6A, 0xAEF94710F910BEAE, 0x69D0D26BD06B0269,  
            0x17912EA891A8BF17, 0x995829E858E87199, 0x3A2774692769533A, 0x27B94ED0B9D0F727,  
            0xD938A948384891D9, 0xEB13CD351335DEEB, 0x2BB356CEB3CEE52B, 0x2233445533557722,  
            0xD2BBBFD6BBD604D2, 0xA9704990709039A9, 0x07890E8089808707, 0x33A766F2A7F2C133,  
            0x2DB65AC1B6C1EC2D, 0x3C22786622665A3C, 0x15922AAD92ADB815, 0xC92089602060A9C9,  
            0x874915DB49DB5C87, 0xAAFF4F1AFF1AB0AA, 0x5078A0887888D850, 0xA57A518E7A8E2BA5,  
            0x038F068A8F8A8903, 0x59F8B213F8134A59, 0x0980129B809B9209, 0x1A1734391739231A,  
            0x65DACA75DA751065, 0xD731B553315384D7, 0x84C61351C651D584, 0xD0B8BBD3B8D303D0,  
            0x82C31F5EC35EDC82, 0x29B052CBB0CBE229, 0x5A77B4997799C35A, 0x1E113C3311332D1E,  
            0x7BCBF646CB463D7B, 0xA8FC4B1FFC1FB7A8, 0x6DD6DA61D6610C6D, 0x2C3A584E3A4E622C,  

            0xA597F4A5F432C6C6, 0x84EB9784976FF8F8, 0x99C7B099B05EEEEE, 0x8DF78C8D8C7AF6F6,  
            0x0DE5170D17E8FFFF, 0xBDB7DCBDDC0AD6D6, 0xB1A7C8B1C816DEDE, 0x5439FC54FC6D9191,  
            0x50C0F050F0906060, 0x0304050305070202, 0xA987E0A9E02ECECE, 0x7DAC877D87D15656,  
            0x19D52B192BCCE7E7, 0x6271A662A613B5B5, 0xE69A31E6317C4D4D, 0x9AC3B59AB559ECEC,  
            0x4505CF45CF408F8F, 0x9D3EBC9DBCA31F1F, 0x4009C040C0498989, 0x87EF92879268FAFA,  
            0x15C53F153FD0EFEF, 0xEB7F26EB2694B2B2, 0xC90740C940CE8E8E, 0x0BED1D0B1DE6FBFB,  
            0xEC822FEC2F6E4141, 0x677DA967A91AB3B3, 0xFDBE1CFD1C435F5F, 0xEA8A25EA25604545,  
            0xBF46DABFDAF92323, 0xF7A602F702515353, 0x96D3A196A145E4E4, 0x5B2DED5BED769B9B,  
            0xC2EA5DC25D287575, 0x1CD9241C24C5E1E1, 0xAE7AE9AEE9D43D3D, 0x6A98BE6ABEF24C4C,  
            0x5AD8EE5AEE826C6C, 0x41FCC341C3BD7E7E, 0x02F1060206F3F5F5, 0x4F1DD14FD1528383,  
            0x5CD0E45CE48C6868, 0xF4A207F407565151, 0x34B95C345C8DD1D1, 0x08E9180818E1F9F9,  
            0x93DFAE93AE4CE2E2, 0x734D9573953EABAB, 0x53C4F553F5976262, 0x3F54413F416B2A2A,  
            0x0C10140C141C0808, 0x5231F652F6639595, 0x658CAF65AFE94646, 0x5E21E25EE27F9D9D,  
            0x2860782878483030, 0xA16EF8A1F8CF3737, 0x0F14110F111B0A0A, 0xB55EC4B5C4EB2F2F,  
            0x091C1B091B150E0E, 0x36485A365A7E2424, 0x9B36B69BB6AD1B1B, 0x3DA5473D4798DFDF,  
            0x26816A266AA7CDCD, 0x699CBB69BBF54E4E, 0xCDFE4CCD4C337F7F, 0x9FCFBA9FBA50EAEA,  
            0x1B242D1B2D3F1212, 0x9E3AB99EB9A41D1D, 0x74B09C749CC45858, 0x2E68722E72463434,  
            0x2D6C772D77413636, 0xB2A3CDB2CD11DCDC, 0xEE7329EE299DB4B4, 0xFBB616FB164D5B5B,  
            0xF65301F601A5A4A4, 0x4DECD74DD7A17676, 0x6175A361A314B7B7, 0xCEFA49CE49347D7D,  
            0x7BA48D7B8DDF5252, 0x3EA1423E429FDDDD, 0x71BC937193CD5E5E, 0x9726A297A2B11313,  
            0xF55704F504A2A6A6, 0x6869B868B801B9B9, 0x0000000000000000, 0x2C99742C74B5C1C1,  
            0x6080A060A0E04040, 0x1FDD211F21C2E3E3, 0xC8F243C8433A7979, 0xED772CED2C9AB6B6,  
            0xBEB3D9BED90DD4D4, 0x4601CA46CA478D8D, 0xD9CE70D970176767, 0x4BE4DD4BDDAF7272,  
            0xDE3379DE79ED9494, 0xD42B67D467FF9898, 0xE87B23E82393B0B0, 0x4A11DE4ADE5B8585,  
            0x6B6DBD6BBD06BBBB, 0x2A917E2A7EBBC5C5, 0xE59E34E5347B4F4F, 0x16C13A163AD7EDED,  
            0xC51754C554D28686, 0xD72F62D762F89A9A, 0x55CCFF55FF996666, 0x9422A794A7B61111,  
            0xCF0F4ACF4AC08A8A, 0x10C9301030D9E9E9, 0x06080A060A0E0404, 0x81E798819866FEFE,  
            0xF05B0BF00BABA0A0, 0x44F0CC44CCB47878, 0xBA4AD5BAD5F02525, 0xE3963EE33E754B4B,  
            0xF35F0EF30EACA2A2, 0xFEBA19FE19445D5D, 0xC01B5BC05BDB8080, 0x8A0A858A85800505,  
            0xAD7EECADECD33F3F, 0xBC42DFBCDFFE2121, 0x48E0D848D8A87070, 0x04F90C040CFDF1F1,  
            0xDFC67ADF7A196363, 0xC1EE58C1582F7777, 0x75459F759F30AFAF, 0x6384A563A5E74242,  
            0x3040503050702020, 0x1AD12E1A2ECBE5E5, 0x0EE1120E12EFFDFD, 0x6D65B76DB708BFBF,  
            0x4C19D44CD4558181, 0x14303C143C241818, 0x354C5F355F792626, 0x2F9D712F71B2C3C3,  
            0xE16738E13886BEBE, 0xA26AFDA2FDC83535, 0xCC0B4FCC4FC78888, 0x395C4B394B652E2E,  
            0x573DF957F96A9393, 0xF2AA0DF20D585555, 0x82E39D829D61FCFC, 0x47F4C947C9B37A7A,  
            0xAC8BEFACEF27C8C8, 0xE76F32E73288BABA, 0x2B647D2B7D4F3232, 0x95D7A495A442E6E6,  
            0xA09BFBA0FB3BC0C0, 0x9832B398B3AA1919, 0xD12768D168F69E9E, 0x7F5D817F8122A3A3,  
            0x6688AA66AAEE4444, 0x7EA8827E82D65454, 0xAB76E6ABE6DD3B3B, 0x83169E839E950B0B,  
            0xCA0345CA45C98C8C, 0x29957B297BBCC7C7, 0xD3D66ED36E056B6B, 0x3C50443C446C2828,  
            0x79558B798B2CA7A7, 0xE2633DE23D81BCBC, 0x1D2C271D27311616, 0x76419A769A37ADAD,  
            0x3BAD4D3B4D96DBDB, 0x56C8FA56FA9E6464, 0x4EE8D24ED2A67474, 0x1E28221E22361414,  
            0xDB3F76DB76E49292, 0x0A181E0A1E120C0C, 0x6C90B46CB4FC4848, 0xE46B37E4378FB8B8,  
            0x5D25E75DE7789F9F, 0x6E61B26EB20FBDBD, 0xEF862AEF2A694343, 0xA693F1A6F135C4C4,  
            0xA872E3A8E3DA3939, 0xA462F7A4F7C63131, 0x37BD5937598AD3D3, 0x8BFF868B8674F2F2,  
            0x32B156325683D5D5, 0x430DC543C54E8B8B, 0x59DCEB59EB856E6E, 0xB7AFC2B7C218DADA,  
            0x8C028F8C8F8E0101, 0x6479AC64AC1DB1B1, 0xD2236DD26DF19C9C, 0xE0923BE03B724949,  
            0xB4ABC7B4C71FD8D8, 0xFA4315FA15B9ACAC, 0x07FD090709FAF3F3, 0x25856F256FA0CFCF,  
            0xAF8FEAAFEA20CACA, 0x8EF3898E897DF4F4, 0xE98E20E920674747, 0x1820281828381010,  
            0xD5DE64D5640B6F6F, 0x88FB83888373F0F0, 0x6F94B16FB1FB4A4A, 0x72B8967296CA5C5C,  
            0x24706C246C543838, 0xF1AE08F1085F5757, 0xC7E652C752217373, 0x5135F351F3649797,  
            0x238D652365AECBCB, 0x7C59847C8425A1A1, 0x9CCBBF9CBF57E8E8, 0x217C6321635D3E3E,  
            0xDD377CDD7CEA9696, 0xDCC27FDC7F1E6161, 0x861A9186919C0D0D, 0x851E9485949B0F0F,  
            0x90DBAB90AB4BE0E0, 0x42F8C642C6BA7C7C, 0xC4E257C457267171, 0xAA83E5AAE529CCCC,  
            0xD83B73D873E39090, 0x050C0F050F090606, 0x01F5030103F4F7F7, 0x12383612362A1C1C,  
            0xA39FFEA3FE3CC2C2, 0x5FD4E15FE18B6A6A, 0xF94710F910BEAEAE, 0xD0D26BD06B026969,  
            0x912EA891A8BF1717, 0x5829E858E8719999, 0x2774692769533A3A, 0xB94ED0B9D0F72727,  
            0x38A948384891D9D9, 0x13CD351335DEEBEB, 0xB356CEB3CEE52B2B, 0x3344553355772222,  
            0xBBBFD6BBD604D2D2, 0x704990709039A9A9, 0x890E808980870707, 0xA766F2A7F2C13333,  
            0xB65AC1B6C1EC2D2D, 0x22786622665A3C3C, 0x922AAD92ADB81515, 0x2089602060A9C9C9,  
            0x4915DB49DB5C8787, 0xFF4F1AFF1AB0AAAA, 0x78A0887888D85050, 0x7A518E7A8E2BA5A5,  
            0x8F068A8F8A890303, 0xF8B213F8134A5959, 0x80129B809B920909, 0x1734391739231A1A,  
            0xDACA75DA75106565, 0x31B553315384D7D7, 0xC61351C651D58484, 0xB8BBD3B8D303D0D0,  
            0xC31F5EC35EDC8282, 0xB052CBB0CBE22929, 0x77B4997799C35A5A, 0x113C3311332D1E1E,  
            0xCBF646CB463D7B7B, 0xFC4B1FFC1FB7A8A8, 0xD6DA61D6610C6D6D, 0x3A584E3A4E622C2C,  

            0x97F4A5F432C6C6A5, 0xEB9784976FF8F884, 0xC7B099B05EEEEE99, 0xF78C8D8C7AF6F68D,  
            0xE5170D17E8FFFF0D, 0xB7DCBDDC0AD6D6BD, 0xA7C8B1C816DEDEB1, 0x39FC54FC6D919154,  
            0xC0F050F090606050, 0x0405030507020203, 0x87E0A9E02ECECEA9, 0xAC877D87D156567D,  
            0xD52B192BCCE7E719, 0x71A662A613B5B562, 0x9A31E6317C4D4DE6, 0xC3B59AB559ECEC9A,  
            0x05CF45CF408F8F45, 0x3EBC9DBCA31F1F9D, 0x09C040C049898940, 0xEF92879268FAFA87,  
            0xC53F153FD0EFEF15, 0x7F26EB2694B2B2EB, 0x0740C940CE8E8EC9, 0xED1D0B1DE6FBFB0B,  
            0x822FEC2F6E4141EC, 0x7DA967A91AB3B367, 0xBE1CFD1C435F5FFD, 0x8A25EA25604545EA,  
            0x46DABFDAF92323BF, 0xA602F702515353F7, 0xD3A196A145E4E496, 0x2DED5BED769B9B5B,  
            0xEA5DC25D287575C2, 0xD9241C24C5E1E11C, 0x7AE9AEE9D43D3DAE, 0x98BE6ABEF24C4C6A,  
            0xD8EE5AEE826C6C5A, 0xFCC341C3BD7E7E41, 0xF1060206F3F5F502, 0x1DD14FD15283834F,  
            0xD0E45CE48C68685C, 0xA207F407565151F4, 0xB95C345C8DD1D134, 0xE9180818E1F9F908,  
            0xDFAE93AE4CE2E293, 0x4D9573953EABAB73, 0xC4F553F597626253, 0x54413F416B2A2A3F,  
            0x10140C141C08080C, 0x31F652F663959552, 0x8CAF65AFE9464665, 0x21E25EE27F9D9D5E,  
            0x6078287848303028, 0x6EF8A1F8CF3737A1, 0x14110F111B0A0A0F, 0x5EC4B5C4EB2F2FB5,  
            0x1C1B091B150E0E09, 0x485A365A7E242436, 0x36B69BB6AD1B1B9B, 0xA5473D4798DFDF3D,  
            0x816A266AA7CDCD26, 0x9CBB69BBF54E4E69, 0xFE4CCD4C337F7FCD, 0xCFBA9FBA50EAEA9F,  
            0x242D1B2D3F12121B, 0x3AB99EB9A41D1D9E, 0xB09C749CC4585874, 0x68722E724634342E,  
            0x6C772D774136362D, 0xA3CDB2CD11DCDCB2, 0x7329EE299DB4B4EE, 0xB616FB164D5B5BFB,  
            0x5301F601A5A4A4F6, 0xECD74DD7A176764D, 0x75A361A314B7B761, 0xFA49CE49347D7DCE,  
            0xA48D7B8DDF52527B, 0xA1423E429FDDDD3E, 0xBC937193CD5E5E71, 0x26A297A2B1131397,  
            0x5704F504A2A6A6F5, 0x69B868B801B9B968, 0x0000000000000000, 0x99742C74B5C1C12C,  
            0x80A060A0E0404060, 0xDD211F21C2E3E31F, 0xF243C8433A7979C8, 0x772CED2C9AB6B6ED,  
            0xB3D9BED90DD4D4BE, 0x01CA46CA478D8D46, 0xCE70D970176767D9, 0xE4DD4BDDAF72724B,  
            0x3379DE79ED9494DE, 0x2B67D467FF9898D4, 0x7B23E82393B0B0E8, 0x11DE4ADE5B85854A,  
            0x6DBD6BBD06BBBB6B, 0x917E2A7EBBC5C52A, 0x9E34E5347B4F4FE5, 0xC13A163AD7EDED16,  
            0x1754C554D28686C5, 0x2F62D762F89A9AD7, 0xCCFF55FF99666655, 0x22A794A7B6111194,  
            0x0F4ACF4AC08A8ACF, 0xC9301030D9E9E910, 0x080A060A0E040406, 0xE798819866FEFE81,  
            0x5B0BF00BABA0A0F0, 0xF0CC44CCB4787844, 0x4AD5BAD5F02525BA, 0x963EE33E754B4BE3,  
            0x5F0EF30EACA2A2F3, 0xBA19FE19445D5DFE, 0x1B5BC05BDB8080C0, 0x0A858A858005058A,  
            0x7EECADECD33F3FAD, 0x42DFBCDFFE2121BC, 0xE0D848D8A8707048, 0xF90C040CFDF1F104,  
            0xC67ADF7A196363DF, 0xEE58C1582F7777C1, 0x459F759F30AFAF75, 0x84A563A5E7424263,  
            0x4050305070202030, 0xD12E1A2ECBE5E51A, 0xE1120E12EFFDFD0E, 0x65B76DB708BFBF6D,  
            0x19D44CD45581814C, 0x303C143C24181814, 0x4C5F355F79262635, 0x9D712F71B2C3C32F,  
            0x6738E13886BEBEE1, 0x6AFDA2FDC83535A2, 0x0B4FCC4FC78888CC, 0x5C4B394B652E2E39,  
            0x3DF957F96A939357, 0xAA0DF20D585555F2, 0xE39D829D61FCFC82, 0xF4C947C9B37A7A47,  
            0x8BEFACEF27C8C8AC, 0x6F32E73288BABAE7, 0x647D2B7D4F32322B, 0xD7A495A442E6E695,  
            0x9BFBA0FB3BC0C0A0, 0x32B398B3AA191998, 0x2768D168F69E9ED1, 0x5D817F8122A3A37F,  
            0x88AA66AAEE444466, 0xA8827E82D654547E, 0x76E6ABE6DD3B3BAB, 0x169E839E950B0B83,  
            0x0345CA45C98C8CCA, 0x957B297BBCC7C729, 0xD66ED36E056B6BD3, 0x50443C446C28283C,  
            0x558B798B2CA7A779, 0x633DE23D81BCBCE2, 0x2C271D273116161D, 0x419A769A37ADAD76,  
            0xAD4D3B4D96DBDB3B, 0xC8FA56FA9E646456, 0xE8D24ED2A674744E, 0x28221E223614141E,  
            0x3F76DB76E49292DB, 0x181E0A1E120C0C0A, 0x90B46CB4FC48486C, 0x6B37E4378FB8B8E4,  
            0x25E75DE7789F9F5D, 0x61B26EB20FBDBD6E, 0x862AEF2A694343EF, 0x93F1A6F135C4C4A6,  
            0x72E3A8E3DA3939A8, 0x62F7A4F7C63131A4, 0xBD5937598AD3D337, 0xFF868B8674F2F28B,  
            0xB156325683D5D532, 0x0DC543C54E8B8B43, 0xDCEB59EB856E6E59, 0xAFC2B7C218DADAB7,  
            0x028F8C8F8E01018C, 0x79AC64AC1DB1B164, 0x236DD26DF19C9CD2, 0x923BE03B724949E0,  
            0xABC7B4C71FD8D8B4, 0x4315FA15B9ACACFA, 0xFD090709FAF3F307, 0x856F256FA0CFCF25,  
            0x8FEAAFEA20CACAAF, 0xF3898E897DF4F48E, 0x8E20E920674747E9, 0x2028182838101018,  
            0xDE64D5640B6F6FD5, 0xFB83888373F0F088, 0x94B16FB1FB4A4A6F, 0xB8967296CA5C5C72,  
            0x706C246C54383824, 0xAE08F1085F5757F1, 0xE652C752217373C7, 0x35F351F364979751,  
            0x8D652365AECBCB23, 0x59847C8425A1A17C, 0xCBBF9CBF57E8E89C, 0x7C6321635D3E3E21,  
            0x377CDD7CEA9696DD, 0xC27FDC7F1E6161DC, 0x1A9186919C0D0D86, 0x1E9485949B0F0F85,  
            0xDBAB90AB4BE0E090, 0xF8C642C6BA7C7C42, 0xE257C457267171C4, 0x83E5AAE529CCCCAA,  
            0x3B73D873E39090D8, 0x0C0F050F09060605, 0xF5030103F4F7F701, 0x383612362A1C1C12,  
            0x9FFEA3FE3CC2C2A3, 0xD4E15FE18B6A6A5F, 0x4710F910BEAEAEF9, 0xD26BD06B026969D0,  
            0x2EA891A8BF171791, 0x29E858E871999958, 0x74692769533A3A27, 0x4ED0B9D0F72727B9,  
            0xA948384891D9D938, 0xCD351335DEEBEB13, 0x56CEB3CEE52B2BB3, 0x4455335577222233,  
            0xBFD6BBD604D2D2BB, 0x4990709039A9A970, 0x0E80898087070789, 0x66F2A7F2C13333A7,  
            0x5AC1B6C1EC2D2DB6, 0x786622665A3C3C22, 0x2AAD92ADB8151592, 0x89602060A9C9C920,  
            0x15DB49DB5C878749, 0x4F1AFF1AB0AAAAFF, 0xA0887888D8505078, 0x518E7A8E2BA5A57A,  
            0x068A8F8A8903038F, 0xB213F8134A5959F8, 0x129B809B92090980, 0x34391739231A1A17,  
            0xCA75DA75106565DA, 0xB553315384D7D731, 0x1351C651D58484C6, 0xBBD3B8D303D0D0B8,  
            0x1F5EC35EDC8282C3, 0x52CBB0CBE22929B0, 0xB4997799C35A5A77, 0x3C3311332D1E1E11,  
            0xF646CB463D7B7BCB, 0x4B1FFC1FB7A8A8FC, 0xDA61D6610C6D6DD6, 0x584E3A4E622C2C3A,  

            0xF4A5F432C6C6A597, 0x9784976FF8F884EB, 0xB099B05EEEEE99C7, 0x8C8D8C7AF6F68DF7,  
            0x170D17E8FFFF0DE5, 0xDCBDDC0AD6D6BDB7, 0xC8B1C816DEDEB1A7, 0xFC54FC6D91915439,  
            0xF050F090606050C0, 0x0503050702020304, 0xE0A9E02ECECEA987, 0x877D87D156567DAC,  
            0x2B192BCCE7E719D5, 0xA662A613B5B56271, 0x31E6317C4D4DE69A, 0xB59AB559ECEC9AC3,  
            0xCF45CF408F8F4505, 0xBC9DBCA31F1F9D3E, 0xC040C04989894009, 0x92879268FAFA87EF,  
            0x3F153FD0EFEF15C5, 0x26EB2694B2B2EB7F, 0x40C940CE8E8EC907, 0x1D0B1DE6FBFB0BED,  
            0x2FEC2F6E4141EC82, 0xA967A91AB3B3677D, 0x1CFD1C435F5FFDBE, 0x25EA25604545EA8A,  
            0xDABFDAF92323BF46, 0x02F702515353F7A6, 0xA196A145E4E496D3, 0xED5BED769B9B5B2D,  
            0x5DC25D287575C2EA, 0x241C24C5E1E11CD9, 0xE9AEE9D43D3DAE7A, 0xBE6ABEF24C4C6A98,  
            0xEE5AEE826C6C5AD8, 0xC341C3BD7E7E41FC, 0x060206F3F5F502F1, 0xD14FD15283834F1D,  
            0xE45CE48C68685CD0, 0x07F407565151F4A2, 0x5C345C8DD1D134B9, 0x180818E1F9F908E9,  
            0xAE93AE4CE2E293DF, 0x9573953EABAB734D, 0xF553F597626253C4, 0x413F416B2A2A3F54,  
            0x140C141C08080C10, 0xF652F66395955231, 0xAF65AFE94646658C, 0xE25EE27F9D9D5E21,  
            0x7828784830302860, 0xF8A1F8CF3737A16E, 0x110F111B0A0A0F14, 0xC4B5C4EB2F2FB55E,  
            0x1B091B150E0E091C, 0x5A365A7E24243648, 0xB69BB6AD1B1B9B36, 0x473D4798DFDF3DA5,  
            0x6A266AA7CDCD2681, 0xBB69BBF54E4E699C, 0x4CCD4C337F7FCDFE, 0xBA9FBA50EAEA9FCF,  
            0x2D1B2D3F12121B24, 0xB99EB9A41D1D9E3A, 0x9C749CC4585874B0, 0x722E724634342E68,  
            0x772D774136362D6C, 0xCDB2CD11DCDCB2A3, 0x29EE299DB4B4EE73, 0x16FB164D5B5BFBB6,  
            0x01F601A5A4A4F653, 0xD74DD7A176764DEC, 0xA361A314B7B76175, 0x49CE49347D7DCEFA,  
            0x8D7B8DDF52527BA4, 0x423E429FDDDD3EA1, 0x937193CD5E5E71BC, 0xA297A2B113139726,  
            0x04F504A2A6A6F557, 0xB868B801B9B96869, 0x0000000000000000, 0x742C74B5C1C12C99,  
            0xA060A0E040406080, 0x211F21C2E3E31FDD, 0x43C8433A7979C8F2, 0x2CED2C9AB6B6ED77,  
            0xD9BED90DD4D4BEB3, 0xCA46CA478D8D4601, 0x70D970176767D9CE, 0xDD4BDDAF72724BE4,  
            0x79DE79ED9494DE33, 0x67D467FF9898D42B, 0x23E82393B0B0E87B, 0xDE4ADE5B85854A11,  
            0xBD6BBD06BBBB6B6D, 0x7E2A7EBBC5C52A91, 0x34E5347B4F4FE59E, 0x3A163AD7EDED16C1,  
            0x54C554D28686C517, 0x62D762F89A9AD72F, 0xFF55FF99666655CC, 0xA794A7B611119422,  
            0x4ACF4AC08A8ACF0F, 0x301030D9E9E910C9, 0x0A060A0E04040608, 0x98819866FEFE81E7,  
            0x0BF00BABA0A0F05B, 0xCC44CCB4787844F0, 0xD5BAD5F02525BA4A, 0x3EE33E754B4BE396,  
            0x0EF30EACA2A2F35F, 0x19FE19445D5DFEBA, 0x5BC05BDB8080C01B, 0x858A858005058A0A,  
            0xECADECD33F3FAD7E, 0xDFBCDFFE2121BC42, 0xD848D8A8707048E0, 0x0C040CFDF1F104F9,  
            0x7ADF7A196363DFC6, 0x58C1582F7777C1EE, 0x9F759F30AFAF7545, 0xA563A5E742426384,  
            0x5030507020203040, 0x2E1A2ECBE5E51AD1, 0x120E12EFFDFD0EE1, 0xB76DB708BFBF6D65,  
            0xD44CD45581814C19, 0x3C143C2418181430, 0x5F355F792626354C, 0x712F71B2C3C32F9D,  
            0x38E13886BEBEE167, 0xFDA2FDC83535A26A, 0x4FCC4FC78888CC0B, 0x4B394B652E2E395C,  
            0xF957F96A9393573D, 0x0DF20D585555F2AA, 0x9D829D61FCFC82E3, 0xC947C9B37A7A47F4,  
            0xEFACEF27C8C8AC8B, 0x32E73288BABAE76F, 0x7D2B7D4F32322B64, 0xA495A442E6E695D7,  
            0xFBA0FB3BC0C0A09B, 0xB398B3AA19199832, 0x68D168F69E9ED127, 0x817F8122A3A37F5D,  
            0xAA66AAEE44446688, 0x827E82D654547EA8, 0xE6ABE6DD3B3BAB76, 0x9E839E950B0B8316,  
            0x45CA45C98C8CCA03, 0x7B297BBCC7C72995, 0x6ED36E056B6BD3D6, 0x443C446C28283C50,  
            0x8B798B2CA7A77955, 0x3DE23D81BCBCE263, 0x271D273116161D2C, 0x9A769A37ADAD7641,  
            0x4D3B4D96DBDB3BAD, 0xFA56FA9E646456C8, 0xD24ED2A674744EE8, 0x221E223614141E28,  
            0x76DB76E49292DB3F, 0x1E0A1E120C0C0A18, 0xB46CB4FC48486C90, 0x37E4378FB8B8E46B,  
            0xE75DE7789F9F5D25, 0xB26EB20FBDBD6E61, 0x2AEF2A694343EF86, 0xF1A6F135C4C4A693,  
            0xE3A8E3DA3939A872, 0xF7A4F7C63131A462, 0x5937598AD3D337BD, 0x868B8674F2F28BFF,  
            0x56325683D5D532B1, 0xC543C54E8B8B430D, 0xEB59EB856E6E59DC, 0xC2B7C218DADAB7AF,  
            0x8F8C8F8E01018C02, 0xAC64AC1DB1B16479, 0x6DD26DF19C9CD223, 0x3BE03B724949E092,  
            0xC7B4C71FD8D8B4AB, 0x15FA15B9ACACFA43, 0x090709FAF3F307FD, 0x6F256FA0CFCF2585,  
            0xEAAFEA20CACAAF8F, 0x898E897DF4F48EF3, 0x20E920674747E98E, 0x2818283810101820,  
            0x64D5640B6F6FD5DE, 0x83888373F0F088FB, 0xB16FB1FB4A4A6F94, 0x967296CA5C5C72B8,  
            0x6C246C5438382470, 0x08F1085F5757F1AE, 0x52C752217373C7E6, 0xF351F36497975135,  
            0x652365AECBCB238D, 0x847C8425A1A17C59, 0xBF9CBF57E8E89CCB, 0x6321635D3E3E217C,  
            0x7CDD7CEA9696DD37, 0x7FDC7F1E6161DCC2, 0x9186919C0D0D861A, 0x9485949B0F0F851E,  
            0xAB90AB4BE0E090DB, 0xC642C6BA7C7C42F8, 0x57C457267171C4E2, 0xE5AAE529CCCCAA83,  
            0x73D873E39090D83B, 0x0F050F090606050C, 0x030103F4F7F701F5, 0x3612362A1C1C1238,  
            0xFEA3FE3CC2C2A39F, 0xE15FE18B6A6A5FD4, 0x10F910BEAEAEF947, 0x6BD06B026969D0D2,  
            0xA891A8BF1717912E, 0xE858E87199995829, 0x692769533A3A2774, 0xD0B9D0F72727B94E,  
            0x48384891D9D938A9, 0x351335DEEBEB13CD, 0xCEB3CEE52B2BB356, 0x5533557722223344,  
            0xD6BBD604D2D2BBBF, 0x90709039A9A97049, 0x808980870707890E, 0xF2A7F2C13333A766,  
            0xC1B6C1EC2D2DB65A, 0x6622665A3C3C2278, 0xAD92ADB81515922A, 0x602060A9C9C92089,  
            0xDB49DB5C87874915, 0x1AFF1AB0AAAAFF4F, 0x887888D8505078A0, 0x8E7A8E2BA5A57A51,  
            0x8A8F8A8903038F06, 0x13F8134A5959F8B2, 0x9B809B9209098012, 0x391739231A1A1734,  
            0x75DA75106565DACA, 0x53315384D7D731B5, 0x51C651D58484C613, 0xD3B8D303D0D0B8BB,  
            0x5EC35EDC8282C31F, 0xCBB0CBE22929B052, 0x997799C35A5A77B4, 0x3311332D1E1E113C,  
            0x46CB463D7B7BCBF6, 0x1FFC1FB7A8A8FC4B, 0x61D6610C6D6DD6DA, 0x4E3A4E622C2C3A58,  

            0xA5F432C6C6A597F4, 0x84976FF8F884EB97, 0x99B05EEEEE99C7B0, 0x8D8C7AF6F68DF78C,  
            0x0D17E8FFFF0DE517, 0xBDDC0AD6D6BDB7DC, 0xB1C816DEDEB1A7C8, 0x54FC6D91915439FC,  
            0x50F090606050C0F0, 0x0305070202030405, 0xA9E02ECECEA987E0, 0x7D87D156567DAC87,  
            0x192BCCE7E719D52B, 0x62A613B5B56271A6, 0xE6317C4D4DE69A31, 0x9AB559ECEC9AC3B5,  
            0x45CF408F8F4505CF, 0x9DBCA31F1F9D3EBC, 0x40C04989894009C0, 0x879268FAFA87EF92,  
            0x153FD0EFEF15C53F, 0xEB2694B2B2EB7F26, 0xC940CE8E8EC90740, 0x0B1DE6FBFB0BED1D,  
            0xEC2F6E4141EC822F, 0x67A91AB3B3677DA9, 0xFD1C435F5FFDBE1C, 0xEA25604545EA8A25,  
            0xBFDAF92323BF46DA, 0xF702515353F7A602, 0x96A145E4E496D3A1, 0x5BED769B9B5B2DED,  
            0xC25D287575C2EA5D, 0x1C24C5E1E11CD924, 0xAEE9D43D3DAE7AE9, 0x6ABEF24C4C6A98BE,  
            0x5AEE826C6C5AD8EE, 0x41C3BD7E7E41FCC3, 0x0206F3F5F502F106, 0x4FD15283834F1DD1,  
            0x5CE48C68685CD0E4, 0xF407565151F4A207, 0x345C8DD1D134B95C, 0x0818E1F9F908E918,  
            0x93AE4CE2E293DFAE, 0x73953EABAB734D95, 0x53F597626253C4F5, 0x3F416B2A2A3F5441,  
            0x0C141C08080C1014, 0x52F66395955231F6, 0x65AFE94646658CAF, 0x5EE27F9D9D5E21E2,  
            0x2878483030286078, 0xA1F8CF3737A16EF8, 0x0F111B0A0A0F1411, 0xB5C4EB2F2FB55EC4,  
            0x091B150E0E091C1B, 0x365A7E242436485A, 0x9BB6AD1B1B9B36B6, 0x3D4798DFDF3DA547,  
            0x266AA7CDCD26816A, 0x69BBF54E4E699CBB, 0xCD4C337F7FCDFE4C, 0x9FBA50EAEA9FCFBA,  
            0x1B2D3F12121B242D, 0x9EB9A41D1D9E3AB9, 0x749CC4585874B09C, 0x2E724634342E6872,  
            0x2D774136362D6C77, 0xB2CD11DCDCB2A3CD, 0xEE299DB4B4EE7329, 0xFB164D5B5BFBB616,  
            0xF601A5A4A4F65301, 0x4DD7A176764DECD7, 0x61A314B7B76175A3, 0xCE49347D7DCEFA49,  
            0x7B8DDF52527BA48D, 0x3E429FDDDD3EA142, 0x7193CD5E5E71BC93, 0x97A2B113139726A2,  
            0xF504A2A6A6F55704, 0x68B801B9B96869B8, 0x0000000000000000, 0x2C74B5C1C12C9974,  
            0x60A0E040406080A0, 0x1F21C2E3E31FDD21, 0xC8433A7979C8F243, 0xED2C9AB6B6ED772C,  
            0xBED90DD4D4BEB3D9, 0x46CA478D8D4601CA, 0xD970176767D9CE70, 0x4BDDAF72724BE4DD,  
            0xDE79ED9494DE3379, 0xD467FF9898D42B67, 0xE82393B0B0E87B23, 0x4ADE5B85854A11DE,  
            0x6BBD06BBBB6B6DBD, 0x2A7EBBC5C52A917E, 0xE5347B4F4FE59E34, 0x163AD7EDED16C13A,  
            0xC554D28686C51754, 0xD762F89A9AD72F62, 0x55FF99666655CCFF, 0x94A7B611119422A7,  
            0xCF4AC08A8ACF0F4A, 0x1030D9E9E910C930, 0x060A0E040406080A, 0x819866FEFE81E798,  
            0xF00BABA0A0F05B0B, 0x44CCB4787844F0CC, 0xBAD5F02525BA4AD5, 0xE33E754B4BE3963E,  
            0xF30EACA2A2F35F0E, 0xFE19445D5DFEBA19, 0xC05BDB8080C01B5B, 0x8A858005058A0A85,  
            0xADECD33F3FAD7EEC, 0xBCDFFE2121BC42DF, 0x48D8A8707048E0D8, 0x040CFDF1F104F90C,  
            0xDF7A196363DFC67A, 0xC1582F7777C1EE58, 0x759F30AFAF75459F, 0x63A5E742426384A5,  
            0x3050702020304050, 0x1A2ECBE5E51AD12E, 0x0E12EFFDFD0EE112, 0x6DB708BFBF6D65B7,  
            0x4CD45581814C19D4, 0x143C24181814303C, 0x355F792626354C5F, 0x2F71B2C3C32F9D71,  
            0xE13886BEBEE16738, 0xA2FDC83535A26AFD, 0xCC4FC78888CC0B4F, 0x394B652E2E395C4B,  
            0x57F96A9393573DF9, 0xF20D585555F2AA0D, 0x829D61FCFC82E39D, 0x47C9B37A7A47F4C9,  
            0xACEF27C8C8AC8BEF, 0xE73288BABAE76F32, 0x2B7D4F32322B647D, 0x95A442E6E695D7A4,  
            0xA0FB3BC0C0A09BFB, 0x98B3AA19199832B3, 0xD168F69E9ED12768, 0x7F8122A3A37F5D81,  
            0x66AAEE44446688AA, 0x7E82D654547EA882, 0xABE6DD3B3BAB76E6, 0x839E950B0B83169E,  
            0xCA45C98C8CCA0345, 0x297BBCC7C729957B, 0xD36E056B6BD3D66E, 0x3C446C28283C5044,  
            0x798B2CA7A779558B, 0xE23D81BCBCE2633D, 0x1D273116161D2C27, 0x769A37ADAD76419A,  
            0x3B4D96DBDB3BAD4D, 0x56FA9E646456C8FA, 0x4ED2A674744EE8D2, 0x1E223614141E2822,  
            0xDB76E49292DB3F76, 0x0A1E120C0C0A181E, 0x6CB4FC48486C90B4, 0xE4378FB8B8E46B37,  
            0x5DE7789F9F5D25E7, 0x6EB20FBDBD6E61B2, 0xEF2A694343EF862A, 0xA6F135C4C4A693F1,  
            0xA8E3DA3939A872E3, 0xA4F7C63131A462F7, 0x37598AD3D337BD59, 0x8B8674F2F28BFF86,  
            0x325683D5D532B156, 0x43C54E8B8B430DC5, 0x59EB856E6E59DCEB, 0xB7C218DADAB7AFC2,  
            0x8C8F8E01018C028F, 0x64AC1DB1B16479AC, 0xD26DF19C9CD2236D, 0xE03B724949E0923B,  
            0xB4C71FD8D8B4ABC7, 0xFA15B9ACACFA4315, 0x0709FAF3F307FD09, 0x256FA0CFCF25856F,  
            0xAFEA20CACAAF8FEA, 0x8E897DF4F48EF389, 0xE920674747E98E20, 0x1828381010182028,  
            0xD5640B6F6FD5DE64, 0x888373F0F088FB83, 0x6FB1FB4A4A6F94B1, 0x7296CA5C5C72B896,  
            0x246C54383824706C, 0xF1085F5757F1AE08, 0xC752217373C7E652, 0x51F36497975135F3,  
            0x2365AECBCB238D65, 0x7C8425A1A17C5984, 0x9CBF57E8E89CCBBF, 0x21635D3E3E217C63,  
            0xDD7CEA9696DD377C, 0xDC7F1E6161DCC27F, 0x86919C0D0D861A91, 0x85949B0F0F851E94,  
            0x90AB4BE0E090DBAB, 0x42C6BA7C7C42F8C6, 0xC457267171C4E257, 0xAAE529CCCCAA83E5,  
            0xD873E39090D83B73, 0x050F090606050C0F, 0x0103F4F7F701F503, 0x12362A1C1C123836,  
            0xA3FE3CC2C2A39FFE, 0x5FE18B6A6A5FD4E1, 0xF910BEAEAEF94710, 0xD06B026969D0D26B,  
            0x91A8BF1717912EA8, 0x58E87199995829E8, 0x2769533A3A277469, 0xB9D0F72727B94ED0,  
            0x384891D9D938A948, 0x1335DEEBEB13CD35, 0xB3CEE52B2BB356CE, 0x3355772222334455,  
            0xBBD604D2D2BBBFD6, 0x709039A9A9704990, 0x8980870707890E80, 0xA7F2C13333A766F2,  
            0xB6C1EC2D2DB65AC1, 0x22665A3C3C227866, 0x92ADB81515922AAD, 0x2060A9C9C9208960,  
            0x49DB5C87874915DB, 0xFF1AB0AAAAFF4F1A, 0x7888D8505078A088, 0x7A8E2BA5A57A518E,  
            0x8F8A8903038F068A, 0xF8134A5959F8B213, 0x809B92090980129B, 0x1739231A1A173439,  
            0xDA75106565DACA75, 0x315384D7D731B553, 0xC651D58484C61351, 0xB8D303D0D0B8BBD3,  
            0xC35EDC8282C31F5E, 0xB0CBE22929B052CB, 0x7799C35A5A77B499, 0x11332D1E1E113C33,  
            0xCB463D7B7BCBF646, 0xFC1FB7A8A8FC4B1F, 0xD6610C6D6DD6DA61, 0x3A4E622C2C3A584E,  

            0xF432C6C6A597F4A5, 0x976FF8F884EB9784, 0xB05EEEEE99C7B099, 0x8C7AF6F68DF78C8D,  
            0x17E8FFFF0DE5170D, 0xDC0AD6D6BDB7DCBD, 0xC816DEDEB1A7C8B1, 0xFC6D91915439FC54,  
            0xF090606050C0F050, 0x0507020203040503, 0xE02ECECEA987E0A9, 0x87D156567DAC877D,  
            0x2BCCE7E719D52B19, 0xA613B5B56271A662, 0x317C4D4DE69A31E6, 0xB559ECEC9AC3B59A,  
            0xCF408F8F4505CF45, 0xBCA31F1F9D3EBC9D, 0xC04989894009C040, 0x9268FAFA87EF9287,  
            0x3FD0EFEF15C53F15, 0x2694B2B2EB7F26EB, 0x40CE8E8EC90740C9, 0x1DE6FBFB0BED1D0B,  
            0x2F6E4141EC822FEC, 0xA91AB3B3677DA967, 0x1C435F5FFDBE1CFD, 0x25604545EA8A25EA,  
            0xDAF92323BF46DABF, 0x02515353F7A602F7, 0xA145E4E496D3A196, 0xED769B9B5B2DED5B,  
            0x5D287575C2EA5DC2, 0x24C5E1E11CD9241C, 0xE9D43D3DAE7AE9AE, 0xBEF24C4C6A98BE6A,  
            0xEE826C6C5AD8EE5A, 0xC3BD7E7E41FCC341, 0x06F3F5F502F10602, 0xD15283834F1DD14F,  
            0xE48C68685CD0E45C, 0x07565151F4A207F4, 0x5C8DD1D134B95C34, 0x18E1F9F908E91808,  
            0xAE4CE2E293DFAE93, 0x953EABAB734D9573, 0xF597626253C4F553, 0x416B2A2A3F54413F,  
            0x141C08080C10140C, 0xF66395955231F652, 0xAFE94646658CAF65, 0xE27F9D9D5E21E25E,  
            0x7848303028607828, 0xF8CF3737A16EF8A1, 0x111B0A0A0F14110F, 0xC4EB2F2FB55EC4B5,  
            0x1B150E0E091C1B09, 0x5A7E242436485A36, 0xB6AD1B1B9B36B69B, 0x4798DFDF3DA5473D,  
            0x6AA7CDCD26816A26, 0xBBF54E4E699CBB69, 0x4C337F7FCDFE4CCD, 0xBA50EAEA9FCFBA9F,  
            0x2D3F12121B242D1B, 0xB9A41D1D9E3AB99E, 0x9CC4585874B09C74, 0x724634342E68722E,  
            0x774136362D6C772D, 0xCD11DCDCB2A3CDB2, 0x299DB4B4EE7329EE, 0x164D5B5BFBB616FB,  
            0x01A5A4A4F65301F6, 0xD7A176764DECD74D, 0xA314B7B76175A361, 0x49347D7DCEFA49CE,  
            0x8DDF52527BA48D7B, 0x429FDDDD3EA1423E, 0x93CD5E5E71BC9371, 0xA2B113139726A297,  
            0x04A2A6A6F55704F5, 0xB801B9B96869B868, 0x0000000000000000, 0x74B5C1C12C99742C,  
            0xA0E040406080A060, 0x21C2E3E31FDD211F, 0x433A7979C8F243C8, 0x2C9AB6B6ED772CED,  
            0xD90DD4D4BEB3D9BE, 0xCA478D8D4601CA46, 0x70176767D9CE70D9, 0xDDAF72724BE4DD4B,  
            0x79ED9494DE3379DE, 0x67FF9898D42B67D4, 0x2393B0B0E87B23E8, 0xDE5B85854A11DE4A,  
            0xBD06BBBB6B6DBD6B, 0x7EBBC5C52A917E2A, 0x347B4F4FE59E34E5, 0x3AD7EDED16C13A16,  
            0x54D28686C51754C5, 0x62F89A9AD72F62D7, 0xFF99666655CCFF55, 0xA7B611119422A794,  
            0x4AC08A8ACF0F4ACF, 0x30D9E9E910C93010, 0x0A0E040406080A06, 0x9866FEFE81E79881,  
            0x0BABA0A0F05B0BF0, 0xCCB4787844F0CC44, 0xD5F02525BA4AD5BA, 0x3E754B4BE3963EE3,  
            0x0EACA2A2F35F0EF3, 0x19445D5DFEBA19FE, 0x5BDB8080C01B5BC0, 0x858005058A0A858A,  
            0xECD33F3FAD7EECAD, 0xDFFE2121BC42DFBC, 0xD8A8707048E0D848, 0x0CFDF1F104F90C04,  
            0x7A196363DFC67ADF, 0x582F7777C1EE58C1, 0x9F30AFAF75459F75, 0xA5E742426384A563,  
            0x5070202030405030, 0x2ECBE5E51AD12E1A, 0x12EFFDFD0EE1120E, 0xB708BFBF6D65B76D,  
            0xD45581814C19D44C, 0x3C24181814303C14, 0x5F792626354C5F35, 0x71B2C3C32F9D712F,  
            0x3886BEBEE16738E1, 0xFDC83535A26AFDA2, 0x4FC78888CC0B4FCC, 0x4B652E2E395C4B39,  
            0xF96A9393573DF957, 0x0D585555F2AA0DF2, 0x9D61FCFC82E39D82, 0xC9B37A7A47F4C947,  
            0xEF27C8C8AC8BEFAC, 0x3288BABAE76F32E7, 0x7D4F32322B647D2B, 0xA442E6E695D7A495,  
            0xFB3BC0C0A09BFBA0, 0xB3AA19199832B398, 0x68F69E9ED12768D1, 0x8122A3A37F5D817F,  
            0xAAEE44446688AA66, 0x82D654547EA8827E, 0xE6DD3B3BAB76E6AB, 0x9E950B0B83169E83,  
            0x45C98C8CCA0345CA, 0x7BBCC7C729957B29, 0x6E056B6BD3D66ED3, 0x446C28283C50443C,  
            0x8B2CA7A779558B79, 0x3D81BCBCE2633DE2, 0x273116161D2C271D, 0x9A37ADAD76419A76,  
            0x4D96DBDB3BAD4D3B, 0xFA9E646456C8FA56, 0xD2A674744EE8D24E, 0x223614141E28221E,  
            0x76E49292DB3F76DB, 0x1E120C0C0A181E0A, 0xB4FC48486C90B46C, 0x378FB8B8E46B37E4,  
            0xE7789F9F5D25E75D, 0xB20FBDBD6E61B26E, 0x2A694343EF862AEF, 0xF135C4C4A693F1A6,  
            0xE3DA3939A872E3A8, 0xF7C63131A462F7A4, 0x598AD3D337BD5937, 0x8674F2F28BFF868B,  
            0x5683D5D532B15632, 0xC54E8B8B430DC543, 0xEB856E6E59DCEB59, 0xC218DADAB7AFC2B7,  
            0x8F8E01018C028F8C, 0xAC1DB1B16479AC64, 0x6DF19C9CD2236DD2, 0x3B724949E0923BE0,  
            0xC71FD8D8B4ABC7B4, 0x15B9ACACFA4315FA, 0x09FAF3F307FD0907, 0x6FA0CFCF25856F25,  
            0xEA20CACAAF8FEAAF, 0x897DF4F48EF3898E, 0x20674747E98E20E9, 0x2838101018202818,  
            0x640B6F6FD5DE64D5, 0x8373F0F088FB8388, 0xB1FB4A4A6F94B16F, 0x96CA5C5C72B89672,  
            0x6C54383824706C24, 0x085F5757F1AE08F1, 0x52217373C7E652C7, 0xF36497975135F351,  
            0x65AECBCB238D6523, 0x8425A1A17C59847C, 0xBF57E8E89CCBBF9C, 0x635D3E3E217C6321,  
            0x7CEA9696DD377CDD, 0x7F1E6161DCC27FDC, 0x919C0D0D861A9186, 0x949B0F0F851E9485,  
            0xAB4BE0E090DBAB90, 0xC6BA7C7C42F8C642, 0x57267171C4E257C4, 0xE529CCCCAA83E5AA,  
            0x73E39090D83B73D8, 0x0F090606050C0F05, 0x03F4F7F701F50301, 0x362A1C1C12383612,  
            0xFE3CC2C2A39FFEA3, 0xE18B6A6A5FD4E15F, 0x10BEAEAEF94710F9, 0x6B026969D0D26BD0,  
            0xA8BF1717912EA891, 0xE87199995829E858, 0x69533A3A27746927, 0xD0F72727B94ED0B9,  
            0x4891D9D938A94838, 0x35DEEBEB13CD3513, 0xCEE52B2BB356CEB3, 0x5577222233445533,  
            0xD604D2D2BBBFD6BB, 0x9039A9A970499070, 0x80870707890E8089, 0xF2C13333A766F2A7,  
            0xC1EC2D2DB65AC1B6, 0x665A3C3C22786622, 0xADB81515922AAD92, 0x60A9C9C920896020,  
            0xDB5C87874915DB49, 0x1AB0AAAAFF4F1AFF, 0x88D8505078A08878, 0x8E2BA5A57A518E7A,  
            0x8A8903038F068A8F, 0x134A5959F8B213F8, 0x9B92090980129B80, 0x39231A1A17343917,  
            0x75106565DACA75DA, 0x5384D7D731B55331, 0x51D58484C61351C6, 0xD303D0D0B8BBD3B8,  
            0x5EDC8282C31F5EC3, 0xCBE22929B052CBB0, 0x99C35A5A77B49977, 0x332D1E1E113C3311,  
            0x463D7B7BCBF646CB, 0x1FB7A8A8FC4B1FFC, 0x610C6D6DD6DA61D6, 0x4E622C2C3A584E3A,  

            0x32C6C6A597F4A5F4, 0x6FF8F884EB978497, 0x5EEEEE99C7B099B0, 0x7AF6F68DF78C8D8C,  
            0xE8FFFF0DE5170D17, 0x0AD6D6BDB7DCBDDC, 0x16DEDEB1A7C8B1C8, 0x6D91915439FC54FC,  
            0x90606050C0F050F0, 0x0702020304050305, 0x2ECECEA987E0A9E0, 0xD156567DAC877D87,  
            0xCCE7E719D52B192B, 0x13B5B56271A662A6, 0x7C4D4DE69A31E631, 0x59ECEC9AC3B59AB5,  
            0x408F8F4505CF45CF, 0xA31F1F9D3EBC9DBC, 0x4989894009C040C0, 0x68FAFA87EF928792,  
            0xD0EFEF15C53F153F, 0x94B2B2EB7F26EB26, 0xCE8E8EC90740C940, 0xE6FBFB0BED1D0B1D,  
            0x6E4141EC822FEC2F, 0x1AB3B3677DA967A9, 0x435F5FFDBE1CFD1C, 0x604545EA8A25EA25,  
            0xF92323BF46DABFDA, 0x515353F7A602F702, 0x45E4E496D3A196A1, 0x769B9B5B2DED5BED,  
            0x287575C2EA5DC25D, 0xC5E1E11CD9241C24, 0xD43D3DAE7AE9AEE9, 0xF24C4C6A98BE6ABE,  
            0x826C6C5AD8EE5AEE, 0xBD7E7E41FCC341C3, 0xF3F5F502F1060206, 0x5283834F1DD14FD1,  
            0x8C68685CD0E45CE4, 0x565151F4A207F407, 0x8DD1D134B95C345C, 0xE1F9F908E9180818,  
            0x4CE2E293DFAE93AE, 0x3EABAB734D957395, 0x97626253C4F553F5, 0x6B2A2A3F54413F41,  
            0x1C08080C10140C14, 0x6395955231F652F6, 0xE94646658CAF65AF, 0x7F9D9D5E21E25EE2,  
            0x4830302860782878, 0xCF3737A16EF8A1F8, 0x1B0A0A0F14110F11, 0xEB2F2FB55EC4B5C4,  
            0x150E0E091C1B091B, 0x7E242436485A365A, 0xAD1B1B9B36B69BB6, 0x98DFDF3DA5473D47,  
            0xA7CDCD26816A266A, 0xF54E4E699CBB69BB, 0x337F7FCDFE4CCD4C, 0x50EAEA9FCFBA9FBA,  
            0x3F12121B242D1B2D, 0xA41D1D9E3AB99EB9, 0xC4585874B09C749C, 0x4634342E68722E72,  
            0x4136362D6C772D77, 0x11DCDCB2A3CDB2CD, 0x9DB4B4EE7329EE29, 0x4D5B5BFBB616FB16,  
            0xA5A4A4F65301F601, 0xA176764DECD74DD7, 0x14B7B76175A361A3, 0x347D7DCEFA49CE49,  
            0xDF52527BA48D7B8D, 0x9FDDDD3EA1423E42, 0xCD5E5E71BC937193, 0xB113139726A297A2,  
            0xA2A6A6F55704F504, 0x01B9B96869B868B8, 0x0000000000000000, 0xB5C1C12C99742C74,  
            0xE040406080A060A0, 0xC2E3E31FDD211F21, 0x3A7979C8F243C843, 0x9AB6B6ED772CED2C,  
            0x0DD4D4BEB3D9BED9, 0x478D8D4601CA46CA, 0x176767D9CE70D970, 0xAF72724BE4DD4BDD,  
            0xED9494DE3379DE79, 0xFF9898D42B67D467, 0x93B0B0E87B23E823, 0x5B85854A11DE4ADE,  
            0x06BBBB6B6DBD6BBD, 0xBBC5C52A917E2A7E, 0x7B4F4FE59E34E534, 0xD7EDED16C13A163A,  
            0xD28686C51754C554, 0xF89A9AD72F62D762, 0x99666655CCFF55FF, 0xB611119422A794A7,  
            0xC08A8ACF0F4ACF4A, 0xD9E9E910C9301030, 0x0E040406080A060A, 0x66FEFE81E7988198,  
            0xABA0A0F05B0BF00B, 0xB4787844F0CC44CC, 0xF02525BA4AD5BAD5, 0x754B4BE3963EE33E,  
            0xACA2A2F35F0EF30E, 0x445D5DFEBA19FE19, 0xDB8080C01B5BC05B, 0x8005058A0A858A85,  
            0xD33F3FAD7EECADEC, 0xFE2121BC42DFBCDF, 0xA8707048E0D848D8, 0xFDF1F104F90C040C,  
            0x196363DFC67ADF7A, 0x2F7777C1EE58C158, 0x30AFAF75459F759F, 0xE742426384A563A5,  
            0x7020203040503050, 0xCBE5E51AD12E1A2E, 0xEFFDFD0EE1120E12, 0x08BFBF6D65B76DB7,  
            0x5581814C19D44CD4, 0x24181814303C143C, 0x792626354C5F355F, 0xB2C3C32F9D712F71,  
            0x86BEBEE16738E138, 0xC83535A26AFDA2FD, 0xC78888CC0B4FCC4F, 0x652E2E395C4B394B,  
            0x6A9393573DF957F9, 0x585555F2AA0DF20D, 0x61FCFC82E39D829D, 0xB37A7A47F4C947C9,  
            0x27C8C8AC8BEFACEF, 0x88BABAE76F32E732, 0x4F32322B647D2B7D, 0x42E6E695D7A495A4,  
            0x3BC0C0A09BFBA0FB, 0xAA19199832B398B3, 0xF69E9ED12768D168, 0x22A3A37F5D817F81,  
            0xEE44446688AA66AA, 0xD654547EA8827E82, 0xDD3B3BAB76E6ABE6, 0x950B0B83169E839E,  
            0xC98C8CCA0345CA45, 0xBCC7C729957B297B, 0x056B6BD3D66ED36E, 0x6C28283C50443C44,  
            0x2CA7A779558B798B, 0x81BCBCE2633DE23D, 0x3116161D2C271D27, 0x37ADAD76419A769A,  
            0x96DBDB3BAD4D3B4D, 0x9E646456C8FA56FA, 0xA674744EE8D24ED2, 0x3614141E28221E22,  
            0xE49292DB3F76DB76, 0x120C0C0A181E0A1E, 0xFC48486C90B46CB4, 0x8FB8B8E46B37E437,  
            0x789F9F5D25E75DE7, 0x0FBDBD6E61B26EB2, 0x694343EF862AEF2A, 0x35C4C4A693F1A6F1,  
            0xDA3939A872E3A8E3, 0xC63131A462F7A4F7, 0x8AD3D337BD593759, 0x74F2F28BFF868B86,  
            0x83D5D532B1563256, 0x4E8B8B430DC543C5, 0x856E6E59DCEB59EB, 0x18DADAB7AFC2B7C2,  
            0x8E01018C028F8C8F, 0x1DB1B16479AC64AC, 0xF19C9CD2236DD26D, 0x724949E0923BE03B,  
            0x1FD8D8B4ABC7B4C7, 0xB9ACACFA4315FA15, 0xFAF3F307FD090709, 0xA0CFCF25856F256F,  
            0x20CACAAF8FEAAFEA, 0x7DF4F48EF3898E89, 0x674747E98E20E920, 0x3810101820281828,  
            0x0B6F6FD5DE64D564, 0x73F0F088FB838883, 0xFB4A4A6F94B16FB1, 0xCA5C5C72B8967296,  
            0x54383824706C246C, 0x5F5757F1AE08F108, 0x217373C7E652C752, 0x6497975135F351F3,  
            0xAECBCB238D652365, 0x25A1A17C59847C84, 0x57E8E89CCBBF9CBF, 0x5D3E3E217C632163,  
            0xEA9696DD377CDD7C, 0x1E6161DCC27FDC7F, 0x9C0D0D861A918691, 0x9B0F0F851E948594,  
            0x4BE0E090DBAB90AB, 0xBA7C7C42F8C642C6, 0x267171C4E257C457, 0x29CCCCAA83E5AAE5,  
            0xE39090D83B73D873, 0x090606050C0F050F, 0xF4F7F701F5030103, 0x2A1C1C1238361236,  
            0x3CC2C2A39FFEA3FE, 0x8B6A6A5FD4E15FE1, 0xBEAEAEF94710F910, 0x026969D0D26BD06B,  
            0xBF1717912EA891A8, 0x7199995829E858E8, 0x533A3A2774692769, 0xF72727B94ED0B9D0,  
            0x91D9D938A9483848, 0xDEEBEB13CD351335, 0xE52B2BB356CEB3CE, 0x7722223344553355,  
            0x04D2D2BBBFD6BBD6, 0x39A9A97049907090, 0x870707890E808980, 0xC13333A766F2A7F2,  
            0xEC2D2DB65AC1B6C1, 0x5A3C3C2278662266, 0xB81515922AAD92AD, 0xA9C9C92089602060,  
            0x5C87874915DB49DB, 0xB0AAAAFF4F1AFF1A, 0xD8505078A0887888, 0x2BA5A57A518E7A8E,  
            0x8903038F068A8F8A, 0x4A5959F8B213F813, 0x92090980129B809B, 0x231A1A1734391739,  
            0x106565DACA75DA75, 0x84D7D731B5533153, 0xD58484C61351C651, 0x03D0D0B8BBD3B8D3,  
            0xDC8282C31F5EC35E, 0xE22929B052CBB0CB, 0xC35A5A77B4997799, 0x2D1E1E113C331133,  
            0x3D7B7BCBF646CB46, 0xB7A8A8FC4B1FFC1F, 0x0C6D6DD6DA61D661, 0x622C2C3A584E3A4E,  

            0xC6C6A597F4A5F432, 0xF8F884EB9784976F, 0xEEEE99C7B099B05E, 0xF6F68DF78C8D8C7A,  
            0xFFFF0DE5170D17E8, 0xD6D6BDB7DCBDDC0A, 0xDEDEB1A7C8B1C816, 0x91915439FC54FC6D,  
            0x606050C0F050F090, 0x0202030405030507, 0xCECEA987E0A9E02E, 0x56567DAC877D87D1,  
            0xE7E719D52B192BCC, 0xB5B56271A662A613, 0x4D4DE69A31E6317C, 0xECEC9AC3B59AB559,  
            0x8F8F4505CF45CF40, 0x1F1F9D3EBC9DBCA3, 0x89894009C040C049, 0xFAFA87EF92879268,  
            0xEFEF15C53F153FD0, 0xB2B2EB7F26EB2694, 0x8E8EC90740C940CE, 0xFBFB0BED1D0B1DE6,  
            0x4141EC822FEC2F6E, 0xB3B3677DA967A91A, 0x5F5FFDBE1CFD1C43, 0x4545EA8A25EA2560,  
            0x2323BF46DABFDAF9, 0x5353F7A602F70251, 0xE4E496D3A196A145, 0x9B9B5B2DED5BED76,  
            0x7575C2EA5DC25D28, 0xE1E11CD9241C24C5, 0x3D3DAE7AE9AEE9D4, 0x4C4C6A98BE6ABEF2,  
            0x6C6C5AD8EE5AEE82, 0x7E7E41FCC341C3BD, 0xF5F502F1060206F3, 0x83834F1DD14FD152,  
            0x68685CD0E45CE48C, 0x5151F4A207F40756, 0xD1D134B95C345C8D, 0xF9F908E9180818E1,  
            0xE2E293DFAE93AE4C, 0xABAB734D9573953E, 0x626253C4F553F597, 0x2A2A3F54413F416B,  
            0x08080C10140C141C, 0x95955231F652F663, 0x4646658CAF65AFE9, 0x9D9D5E21E25EE27F,  
            0x3030286078287848, 0x3737A16EF8A1F8CF, 0x0A0A0F14110F111B, 0x2F2FB55EC4B5C4EB,  
            0x0E0E091C1B091B15, 0x242436485A365A7E, 0x1B1B9B36B69BB6AD, 0xDFDF3DA5473D4798,  
            0xCDCD26816A266AA7, 0x4E4E699CBB69BBF5, 0x7F7FCDFE4CCD4C33, 0xEAEA9FCFBA9FBA50,  
            0x12121B242D1B2D3F, 0x1D1D9E3AB99EB9A4, 0x585874B09C749CC4, 0x34342E68722E7246,  
            0x36362D6C772D7741, 0xDCDCB2A3CDB2CD11, 0xB4B4EE7329EE299D, 0x5B5BFBB616FB164D,  
            0xA4A4F65301F601A5, 0x76764DECD74DD7A1, 0xB7B76175A361A314, 0x7D7DCEFA49CE4934,  
            0x52527BA48D7B8DDF, 0xDDDD3EA1423E429F, 0x5E5E71BC937193CD, 0x13139726A297A2B1,  
            0xA6A6F55704F504A2, 0xB9B96869B868B801, 0x0000000000000000, 0xC1C12C99742C74B5,  
            0x40406080A060A0E0, 0xE3E31FDD211F21C2, 0x7979C8F243C8433A, 0xB6B6ED772CED2C9A,  
            0xD4D4BEB3D9BED90D, 0x8D8D4601CA46CA47, 0x6767D9CE70D97017, 0x72724BE4DD4BDDAF,  
            0x9494DE3379DE79ED, 0x9898D42B67D467FF, 0xB0B0E87B23E82393, 0x85854A11DE4ADE5B,  
            0xBBBB6B6DBD6BBD06, 0xC5C52A917E2A7EBB, 0x4F4FE59E34E5347B, 0xEDED16C13A163AD7,  
            0x8686C51754C554D2, 0x9A9AD72F62D762F8, 0x666655CCFF55FF99, 0x11119422A794A7B6,  
            0x8A8ACF0F4ACF4AC0, 0xE9E910C9301030D9, 0x040406080A060A0E, 0xFEFE81E798819866,  
            0xA0A0F05B0BF00BAB, 0x787844F0CC44CCB4, 0x2525BA4AD5BAD5F0, 0x4B4BE3963EE33E75,  
            0xA2A2F35F0EF30EAC, 0x5D5DFEBA19FE1944, 0x8080C01B5BC05BDB, 0x05058A0A858A8580,  
            0x3F3FAD7EECADECD3, 0x2121BC42DFBCDFFE, 0x707048E0D848D8A8, 0xF1F104F90C040CFD,  
            0x6363DFC67ADF7A19, 0x7777C1EE58C1582F, 0xAFAF75459F759F30, 0x42426384A563A5E7,  
            0x2020304050305070, 0xE5E51AD12E1A2ECB, 0xFDFD0EE1120E12EF, 0xBFBF6D65B76DB708,  
            0x81814C19D44CD455, 0x181814303C143C24, 0x2626354C5F355F79, 0xC3C32F9D712F71B2,  
            0xBEBEE16738E13886, 0x3535A26AFDA2FDC8, 0x8888CC0B4FCC4FC7, 0x2E2E395C4B394B65,  
            0x9393573DF957F96A, 0x5555F2AA0DF20D58, 0xFCFC82E39D829D61, 0x7A7A47F4C947C9B3,  
            0xC8C8AC8BEFACEF27, 0xBABAE76F32E73288, 0x32322B647D2B7D4F, 0xE6E695D7A495A442,  
            0xC0C0A09BFBA0FB3B, 0x19199832B398B3AA, 0x9E9ED12768D168F6, 0xA3A37F5D817F8122,  
            0x44446688AA66AAEE, 0x54547EA8827E82D6, 0x3B3BAB76E6ABE6DD, 0x0B0B83169E839E95,  
            0x8C8CCA0345CA45C9, 0xC7C729957B297BBC, 0x6B6BD3D66ED36E05, 0x28283C50443C446C,  
            0xA7A779558B798B2C, 0xBCBCE2633DE23D81, 0x16161D2C271D2731, 0xADAD76419A769A37,  
            0xDBDB3BAD4D3B4D96, 0x646456C8FA56FA9E, 0x74744EE8D24ED2A6, 0x14141E28221E2236,  
            0x9292DB3F76DB76E4, 0x0C0C0A181E0A1E12, 0x48486C90B46CB4FC, 0xB8B8E46B37E4378F,  
            0x9F9F5D25E75DE778, 0xBDBD6E61B26EB20F, 0x4343EF862AEF2A69, 0xC4C4A693F1A6F135,  
            0x3939A872E3A8E3DA, 0x3131A462F7A4F7C6, 0xD3D337BD5937598A, 0xF2F28BFF868B8674,  
            0xD5D532B156325683, 0x8B8B430DC543C54E, 0x6E6E59DCEB59EB85, 0xDADAB7AFC2B7C218,  
            0x01018C028F8C8F8E, 0xB1B16479AC64AC1D, 0x9C9CD2236DD26DF1, 0x4949E0923BE03B72,  
            0xD8D8B4ABC7B4C71F, 0xACACFA4315FA15B9, 0xF3F307FD090709FA, 0xCFCF25856F256FA0,  
            0xCACAAF8FEAAFEA20, 0xF4F48EF3898E897D, 0x4747E98E20E92067, 0x1010182028182838,  
            0x6F6FD5DE64D5640B, 0xF0F088FB83888373, 0x4A4A6F94B16FB1FB, 0x5C5C72B8967296CA,  
            0x383824706C246C54, 0x5757F1AE08F1085F, 0x7373C7E652C75221, 0x97975135F351F364,  
            0xCBCB238D652365AE, 0xA1A17C59847C8425, 0xE8E89CCBBF9CBF57, 0x3E3E217C6321635D,  
            0x9696DD377CDD7CEA, 0x6161DCC27FDC7F1E, 0x0D0D861A9186919C, 0x0F0F851E9485949B,  
            0xE0E090DBAB90AB4B, 0x7C7C42F8C642C6BA, 0x7171C4E257C45726, 0xCCCCAA83E5AAE529,  
            0x9090D83B73D873E3, 0x0606050C0F050F09, 0xF7F701F5030103F4, 0x1C1C12383612362A,  
            0xC2C2A39FFEA3FE3C, 0x6A6A5FD4E15FE18B, 0xAEAEF94710F910BE, 0x6969D0D26BD06B02,  
            0x1717912EA891A8BF, 0x99995829E858E871, 0x3A3A277469276953, 0x2727B94ED0B9D0F7,  
            0xD9D938A948384891, 0xEBEB13CD351335DE, 0x2B2BB356CEB3CEE5, 0x2222334455335577,  
            0xD2D2BBBFD6BBD604, 0xA9A9704990709039, 0x0707890E80898087, 0x3333A766F2A7F2C1,  
            0x2D2DB65AC1B6C1EC, 0x3C3C22786622665A, 0x1515922AAD92ADB8, 0xC9C92089602060A9,  
            0x87874915DB49DB5C, 0xAAAAFF4F1AFF1AB0, 0x505078A0887888D8, 0xA5A57A518E7A8E2B,  
            0x03038F068A8F8A89, 0x5959F8B213F8134A, 0x090980129B809B92, 0x1A1A173439173923,  
            0x6565DACA75DA7510, 0xD7D731B553315384, 0x8484C61351C651D5, 0xD0D0B8BBD3B8D303,  
            0x8282C31F5EC35EDC, 0x2929B052CBB0CBE2, 0x5A5A77B4997799C3, 0x1E1E113C3311332D,  
            0x7B7BCBF646CB463D, 0xA8A8FC4B1FFC1FB7, 0x6D6DD6DA61D6610C, 0x2C2C3A584E3A4E62 
        };
        #endregion

        protected readonly ulong[] m_state;

        public GroestlBase(HashSize a_hash_size, int a_block_size)
            : base((int)a_hash_size, a_block_size)
        {
            m_state = new ulong[BlockSize / 8];

            Initialize();
        }

        protected override void Finish()
        {
            ulong processed_blocks = (m_processed_bytes / (uint)BlockSize) + 1;
            int padindex = BlockSize - m_buffer.Pos - 8;

            if (m_buffer.Pos > BlockSize - 8 - 1)
            {
                processed_blocks++;
                padindex += BlockSize;
            }

            byte[] pad = new byte[padindex + 8];

            pad[0] = 0x80;

            Converters.ConvertULongToBytesSwapOrder(processed_blocks, pad, padindex);
            padindex += 8;

            TransformBytes(pad, 0, padindex);

            OutputTransformation();
        }

        protected override byte[] GetResult()
        {
            //return Converters.ConvertULongsToBytes(m_state).SubArray(BlockSize - HashSize, HashSize);

            byte[] b = Converters.ConvertULongsToBytes(m_state);
            byte[] r = new byte[HashSize];

            Buffer.BlockCopy(b, BlockSize - HashSize, r, 0, HashSize);
            Array.Clear(b, 0, b.Length);

            return r;
        }

        protected abstract void OutputTransformation();

        public override void Initialize()
        {

            Array.Clear(m_state, 0, m_state.Length); //m_state.Clear();
            m_state[(BlockSize / 8) - 1] = ((ulong)(HashSize * 8) << 56) | ((ulong)((HashSize * 8) & 0x0000FF00) << 40);

            base.Initialize();
        }
    };

    internal abstract class Groestl256Base : GroestlBase
    {
        public Groestl256Base(HashSize a_hash_size)
            : base(a_hash_size, 64)
        {
        }

        protected override void TransformBlock(byte[] a_data, int a_index)
        {
            ulong[] m = Converters.ConvertBytesToULongs(a_data, a_index, BlockSize);

            ulong y0, y1, y2, y3, y4, y5, y6, y7;
            ulong z0, z1, z2, z3, z4, z5, z6, z7;
            ulong s0, s1, s2, s3, s4, s5, s6, s7;
            ulong w0, w1, w2, w3, w4, w5, w6, w7;

            w0 = m_state[0] ^ m[0];
            w1 = m_state[1] ^ m[1];
            w2 = m_state[2] ^ m[2];
            w3 = m_state[3] ^ m[3];
            w4 = m_state[4] ^ m[4];
            w5 = m_state[5] ^ m[5];
            w6 = m_state[6] ^ m[6];
            w7 = m_state[7] ^ m[7];

            z0 = (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[0];
            z1 = (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[1];
            z2 = (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[2];
            z3 = (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[3];
            z4 = (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[4];
            z5 = (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[5];
            z6 = (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[6];
            z7 = (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ m[7];

            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) | ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000001 << 24) | ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000001 << 40) | ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000001 << 56) | ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000001 << 8) |
                ((ulong)0x0000000000000001 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000001 << 24) |
                ((ulong)0x0000000000000001 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000001 << 40) |
                ((ulong)0x0000000000000001 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000001 << 56) |
                ((ulong)0x0000000000000001 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];

            z0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000002 << 8) |
                ((ulong)0x0000000000000002 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000002 << 24) |
                ((ulong)0x0000000000000002 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000002 << 40) |
                ((ulong)0x0000000000000002 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000002 << 56) |
                ((ulong)0x0000000000000002 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000003 << 8) |
                ((ulong)0x0000000000000003 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000003 << 24) |
                ((ulong)0x0000000000000003 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000003 << 40) |
                ((ulong)0x0000000000000003 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000003 << 56) |
                ((ulong)0x0000000000000003 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];

            z0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000004 << 8) |
                ((ulong)0x0000000000000004 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000004 << 24) |
                ((ulong)0x0000000000000004 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000004 << 40) |
                ((ulong)0x0000000000000004 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000004 << 56) |
                ((ulong)0x0000000000000004 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000005 << 8) |
                ((ulong)0x0000000000000005 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000005 << 24) |
                ((ulong)0x0000000000000005 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000005 << 40) |
                ((ulong)0x0000000000000005 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000005 << 56) |
                ((ulong)0x0000000000000005 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];

            z0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000006 << 8) |
                ((ulong)0x0000000000000006 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000006 << 24) |
                ((ulong)0x0000000000000006 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000006 << 40) |
                ((ulong)0x0000000000000006 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000006 << 56) |
                ((ulong)0x0000000000000006 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000007 << 8) |
                ((ulong)0x0000000000000007 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000007 << 24) |
                ((ulong)0x0000000000000007 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000007 << 40) |
                ((ulong)0x0000000000000007 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000007 << 56) |
                ((ulong)0x0000000000000007 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^ s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^ s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^ s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];

            z0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000008 << 8) |
                ((ulong)0x0000000000000008 >> (64 - 8))) & (0x000000FF000000FF)) | ((((ulong)0x0000000000000008 << 24) |
                ((ulong)0x0000000000000008 >> (64 - 24))) & (0x0000FF000000FF00)) | ((((ulong)0x0000000000000008 << 40) |
                ((ulong)0x0000000000000008 >> (64 - 40))) & (0x00FF000000FF0000)) | ((((ulong)0x0000000000000008 << 56) |
                ((ulong)0x0000000000000008 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000009 << 8) | ((ulong)0x0000000000000009 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000009 << 24) | ((ulong)0x0000000000000009 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000009 << 40) | ((ulong)0x0000000000000009 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000009 << 56) | ((ulong)0x0000000000000009 >> (64 - 56))) & (0xFF000000FF000000)));

            s0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            s1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            s2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            s3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            s4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            s5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            s6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            s7 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];

            w0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(w0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(w3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(w1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(w2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(w3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(w4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(w5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(w6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(w7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^
                s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            m_state[0] ^= s0 ^ y0;
            m_state[1] ^= s1 ^ y1;
            m_state[2] ^= s2 ^ y2;
            m_state[3] ^= s3 ^ y3;
            m_state[4] ^= s4 ^ y4;
            m_state[5] ^= s5 ^ y5;
            m_state[6] ^= s6 ^ y6;
            m_state[7] ^= s7 ^ y7;
        }

        protected override void OutputTransformation()
        {
            ulong w0, w1, w2, w3, w4, w5, w6, w7;
            ulong y0, y1, y2, y3, y4, y5, y6, y7;
            ulong z0, z1, z2, z3, z4, z5, z6, z7;

            w0 = m_state[0];
            w1 = m_state[1];
            w2 = m_state[2];
            w3 = m_state[3];
            w4 = m_state[4];
            w5 = m_state[5];
            w6 = m_state[6];
            w7 = m_state[7];

            w0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            w7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(w0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(w1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(w2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(w3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(w4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(w5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(w6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(w7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0100000000000000 << 8) | ((ulong)0x0100000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0100000000000000 << 24) | ((ulong)0x0100000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0100000000000000 << 40) | ((ulong)0x0100000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0100000000000000 << 56) | ((ulong)0x0100000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0200000000000000 << 8) | ((ulong)0x0200000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0200000000000000 << 24) | ((ulong)0x0200000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0200000000000000 << 40) | ((ulong)0x0200000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0200000000000000 << 56) | ((ulong)0x0200000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0300000000000000 << 8) | ((ulong)0x0300000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0300000000000000 << 24) | ((ulong)0x0300000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0300000000000000 << 40) | ((ulong)0x0300000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0300000000000000 << 56) | ((ulong)0x0300000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0400000000000000 << 8) | ((ulong)0x0400000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0400000000000000 << 24) | ((ulong)0x0400000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0400000000000000 << 40) | ((ulong)0x0400000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0400000000000000 << 56) | ((ulong)0x0400000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0500000000000000 << 8) | ((ulong)0x0500000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0500000000000000 << 24) | ((ulong)0x0500000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0500000000000000 << 40) | ((ulong)0x0500000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0500000000000000 << 56) | ((ulong)0x0500000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0600000000000000 << 8) | ((ulong)0x0600000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0600000000000000 << 24) | ((ulong)0x0600000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0600000000000000 << 40) | ((ulong)0x0600000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0600000000000000 << 56) | ((ulong)0x0600000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0700000000000000 << 8) | ((ulong)0x0700000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0700000000000000 << 24) | ((ulong)0x0700000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0700000000000000 << 40) | ((ulong)0x0700000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0700000000000000 << 56) | ((ulong)0x0700000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0800000000000000 << 8) | ((ulong)0x0800000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0800000000000000 << 24) | ((ulong)0x0800000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0800000000000000 << 40) | ((ulong)0x0800000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0800000000000000 << 56) | ((ulong)0x0800000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)0x0900000000000000 << 8) | ((ulong)0x0900000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0900000000000000 << 24) | ((ulong)0x0900000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0900000000000000 << 40) | ((ulong)0x0900000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0900000000000000 << 56) | ((ulong)0x0900000000000000 >> (64 - 56))) & (0xFF000000FF000000)));

            w0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            w1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            w2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            w3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            w4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            w5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            w6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            w7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            m_state[0] ^= w0;
            m_state[1] ^= w1;
            m_state[2] ^= w2;
            m_state[3] ^= w3;
            m_state[4] ^= w4;
            m_state[5] ^= w5;
            m_state[6] ^= w6;
            m_state[7] ^= w7;
        }

    };

    internal abstract class Groestl512Base : GroestlBase
    {
        public Groestl512Base(HashSize a_hash_size)
            : base(a_hash_size, 128)
        {
        }

        protected override void TransformBlock(byte[] a_data, int a_index)
        {
            ulong[] m = Converters.ConvertBytesToULongs(a_data, a_index, BlockSize);

            ulong y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15;
            ulong z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15;
            ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
            ulong w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;

            w0 = m_state[0] ^ m[0];
            w1 = m_state[1] ^ m[1];
            w2 = m_state[2] ^ m[2];
            w3 = m_state[3] ^ m[3];
            w4 = m_state[4] ^ m[4];
            w5 = m_state[5] ^ m[5];
            w6 = m_state[6] ^ m[6];
            w7 = m_state[7] ^ m[7];
            w8 = m_state[8] ^ m[8];
            w9 = m_state[9] ^ m[9];
            w10 = m_state[10] ^ m[10];
            w11 = m_state[11] ^ m[11];
            w12 = m_state[12] ^ m[12];
            w13 = m_state[13] ^ m[13];
            w14 = m_state[14] ^ m[14];
            w15 = m_state[15] ^ m[15];

            z0 = (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^ m[0];
            z1 = (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^ m[1];
            z2 = (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^ m[2];
            z3 = (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^ m[3];
            z4 = (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^ m[4];
            z5 = (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^ m[5];
            z6 = (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[6];
            z7 = (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[7];
            z8 = (((((ulong)0xffffffffffffff7f << 8) | ((ulong)0xffffffffffffff7f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff7f << 24) | ((ulong)0xffffffffffffff7f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff7f << 40) | ((ulong)0xffffffffffffff7f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff7f << 56) | ((ulong)0xffffffffffffff7f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[8];
            z9 = (((((ulong)0xffffffffffffff6f << 8) | ((ulong)0xffffffffffffff6f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff6f << 24) | ((ulong)0xffffffffffffff6f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff6f << 40) | ((ulong)0xffffffffffffff6f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff6f << 56) | ((ulong)0xffffffffffffff6f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[9];
            z10 = (((((ulong)0xffffffffffffff5f << 8) | ((ulong)0xffffffffffffff5f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff5f << 24) | ((ulong)0xffffffffffffff5f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff5f << 40) | ((ulong)0xffffffffffffff5f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff5f << 56) | ((ulong)0xffffffffffffff5f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[10];
            z11 = (((((ulong)0xffffffffffffff4f << 8) | ((ulong)0xffffffffffffff4f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff4f << 24) | ((ulong)0xffffffffffffff4f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff4f << 40) | ((ulong)0xffffffffffffff4f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff4f << 56) | ((ulong)0xffffffffffffff4f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[11];
            z12 = (((((ulong)0xffffffffffffff3f << 8) | ((ulong)0xffffffffffffff3f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff3f << 24) | ((ulong)0xffffffffffffff3f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff3f << 40) | ((ulong)0xffffffffffffff3f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff3f << 56) | ((ulong)0xffffffffffffff3f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[12];
            z13 = (((((ulong)0xffffffffffffff2f << 8) | ((ulong)0xffffffffffffff2f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff2f << 24) | ((ulong)0xffffffffffffff2f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff2f << 40) | ((ulong)0xffffffffffffff2f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff2f << 56) | ((ulong)0xffffffffffffff2f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[13];
            z14 = (((((ulong)0xffffffffffffff1f << 8) | ((ulong)0xffffffffffffff1f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff1f << 24) | ((ulong)0xffffffffffffff1f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff1f << 40) | ((ulong)0xffffffffffffff1f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff1f << 56) | ((ulong)0xffffffffffffff1f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[14];
            z15 = (((((ulong)0xffffffffffffff0f << 8) | ((ulong)0xffffffffffffff0f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff0f << 24) | ((ulong)0xffffffffffffff0f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff0f << 40) | ((ulong)0xffffffffffffff0f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff0f << 56) | ((ulong)0xffffffffffffff0f >> (64 - 56))) & (0xFF000000FF000000))) ^ m[15];

            y15 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y14 = s_T[0 * 256 + (byte)(z15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y13 = s_T[0 * 256 + (byte)(z14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y12 = s_T[0 * 256 + (byte)(z13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y11 = s_T[0 * 256 + (byte)(z12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y10 = s_T[0 * 256 + (byte)(z11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y9 = s_T[0 * 256 + (byte)(z10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z15 >> (8 * 7))];
            y8 = s_T[0 * 256 + (byte)(z9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z14 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z13 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z12 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z11 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z10 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z9 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z8 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];

            for (int i = 1; i < 14 - 1; i += 2)
            {
                y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y8 ^= (((((ulong)0xffffffffffffff7f << 8) | ((ulong)0xffffffffffffff7f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff7f << 24) | ((ulong)0xffffffffffffff7f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff7f << 40) | ((ulong)0xffffffffffffff7f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff7f << 56) | ((ulong)0xffffffffffffff7f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y9 ^= (((((ulong)0xffffffffffffff6f << 8) | ((ulong)0xffffffffffffff6f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff6f << 24) | ((ulong)0xffffffffffffff6f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff6f << 40) | ((ulong)0xffffffffffffff6f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff6f << 56) | ((ulong)0xffffffffffffff6f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y10 ^= (((((ulong)0xffffffffffffff5f << 8) | ((ulong)0xffffffffffffff5f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff5f << 24) | ((ulong)0xffffffffffffff5f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff5f << 40) | ((ulong)0xffffffffffffff5f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff5f << 56) | ((ulong)0xffffffffffffff5f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y11 ^= (((((ulong)0xffffffffffffff4f << 8) | ((ulong)0xffffffffffffff4f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff4f << 24) | ((ulong)0xffffffffffffff4f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff4f << 40) | ((ulong)0xffffffffffffff4f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff4f << 56) | ((ulong)0xffffffffffffff4f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y12 ^= (((((ulong)0xffffffffffffff3f << 8) | ((ulong)0xffffffffffffff3f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff3f << 24) | ((ulong)0xffffffffffffff3f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff3f << 40) | ((ulong)0xffffffffffffff3f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff3f << 56) | ((ulong)0xffffffffffffff3f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y13 ^= (((((ulong)0xffffffffffffff2f << 8) | ((ulong)0xffffffffffffff2f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff2f << 24) | ((ulong)0xffffffffffffff2f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff2f << 40) | ((ulong)0xffffffffffffff2f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff2f << 56) | ((ulong)0xffffffffffffff2f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y14 ^= (((((ulong)0xffffffffffffff1f << 8) | ((ulong)0xffffffffffffff1f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff1f << 24) | ((ulong)0xffffffffffffff1f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff1f << 40) | ((ulong)0xffffffffffffff1f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff1f << 56) | ((ulong)0xffffffffffffff1f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));
                y15 ^= (((((ulong)0xffffffffffffff0f << 8) | ((ulong)0xffffffffffffff0f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff0f << 24) | ((ulong)0xffffffffffffff0f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff0f << 40) | ((ulong)0xffffffffffffff0f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff0f << 56) | ((ulong)0xffffffffffffff0f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 8) | ((ulong)i >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 24) | ((ulong)i >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 40) | ((ulong)i >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56) | ((ulong)i >> (64 - 56))) & (0xFF000000FF000000)));

                z15 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
                z14 = s_T[0 * 256 + (byte)(y15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
                z13 = s_T[0 * 256 + (byte)(y14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
                z12 = s_T[0 * 256 + (byte)(y13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
                z11 = s_T[0 * 256 + (byte)(y12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
                z10 = s_T[0 * 256 + (byte)(y11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
                z9 = s_T[0 * 256 + (byte)(y10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y15 >> (8 * 7))];
                z8 = s_T[0 * 256 + (byte)(y9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y14 >> (8 * 7))];
                z7 = s_T[0 * 256 + (byte)(y8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y13 >> (8 * 7))];
                z6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y12 >> (8 * 7))];
                z5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y11 >> (8 * 7))];
                z4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y10 >> (8 * 7))];
                z3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y9 >> (8 * 7))];
                z2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y8 >> (8 * 7))];
                z1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
                z0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

                z0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z8 ^= (((((ulong)0xffffffffffffff7f << 8) | ((ulong)0xffffffffffffff7f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff7f << 24) | ((ulong)0xffffffffffffff7f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff7f << 40) | ((ulong)0xffffffffffffff7f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff7f << 56) | ((ulong)0xffffffffffffff7f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z9 ^= (((((ulong)0xffffffffffffff6f << 8) | ((ulong)0xffffffffffffff6f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff6f << 24) | ((ulong)0xffffffffffffff6f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff6f << 40) | ((ulong)0xffffffffffffff6f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff6f << 56) | ((ulong)0xffffffffffffff6f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z10 ^= (((((ulong)0xffffffffffffff5f << 8) | ((ulong)0xffffffffffffff5f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff5f << 24) | ((ulong)0xffffffffffffff5f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff5f << 40) | ((ulong)0xffffffffffffff5f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff5f << 56) | ((ulong)0xffffffffffffff5f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z11 ^= (((((ulong)0xffffffffffffff4f << 8) | ((ulong)0xffffffffffffff4f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff4f << 24) | ((ulong)0xffffffffffffff4f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff4f << 40) | ((ulong)0xffffffffffffff4f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff4f << 56) | ((ulong)0xffffffffffffff4f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z12 ^= (((((ulong)0xffffffffffffff3f << 8) | ((ulong)0xffffffffffffff3f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff3f << 24) | ((ulong)0xffffffffffffff3f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff3f << 40) | ((ulong)0xffffffffffffff3f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff3f << 56) | ((ulong)0xffffffffffffff3f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z13 ^= (((((ulong)0xffffffffffffff2f << 8) | ((ulong)0xffffffffffffff2f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff2f << 24) | ((ulong)0xffffffffffffff2f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff2f << 40) | ((ulong)0xffffffffffffff2f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff2f << 56) | ((ulong)0xffffffffffffff2f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z14 ^= (((((ulong)0xffffffffffffff1f << 8) | ((ulong)0xffffffffffffff1f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff1f << 24) | ((ulong)0xffffffffffffff1f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff1f << 40) | ((ulong)0xffffffffffffff1f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff1f << 56) | ((ulong)0xffffffffffffff1f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));
                z15 ^= (((((ulong)0xffffffffffffff0f << 8) | ((ulong)0xffffffffffffff0f >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xffffffffffffff0f << 24) | ((ulong)0xffffffffffffff0f >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xffffffffffffff0f << 40) | ((ulong)0xffffffffffffff0f >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xffffffffffffff0f << 56) | ((ulong)0xffffffffffffff0f >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 8) | ((ulong)(i + 1) >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 24) | ((ulong)(i + 1) >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 40) | ((ulong)(i + 1) >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56) | ((ulong)(i + 1) >> (64 - 56))) & (0xFF000000FF000000)));

                y15 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
                y14 = s_T[0 * 256 + (byte)(z15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
                y13 = s_T[0 * 256 + (byte)(z14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
                y12 = s_T[0 * 256 + (byte)(z13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
                y11 = s_T[0 * 256 + (byte)(z12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
                y10 = s_T[0 * 256 + (byte)(z11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
                y9 = s_T[0 * 256 + (byte)(z10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z15 >> (8 * 7))];
                y8 = s_T[0 * 256 + (byte)(z9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z14 >> (8 * 7))];
                y7 = s_T[0 * 256 + (byte)(z8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z13 >> (8 * 7))];
                y6 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z12 >> (8 * 7))];
                y5 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z11 >> (8 * 7))];
                y4 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z10 >> (8 * 7))];
                y3 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z9 >> (8 * 7))];
                y2 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z8 >> (8 * 7))];
                y1 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
                y0 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            }

            y0 ^= (((((ulong)0xffffffffffffffff << 8) | ((ulong)0xffffffffffffffff >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffff << 24) | ((ulong)0xffffffffffffffff >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffff << 40) | ((ulong)0xffffffffffffffff >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffff << 56) | ((ulong)0xffffffffffffffff >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0xffffffffffffffef << 8) | ((ulong)0xffffffffffffffef >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffef << 24) | ((ulong)0xffffffffffffffef >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffef << 40) | ((ulong)0xffffffffffffffef >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffef << 56) | ((ulong)0xffffffffffffffef >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0xffffffffffffffdf << 8) | ((ulong)0xffffffffffffffdf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffdf << 24) | ((ulong)0xffffffffffffffdf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffdf << 40) | ((ulong)0xffffffffffffffdf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffdf << 56) | ((ulong)0xffffffffffffffdf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0xffffffffffffffcf << 8) | ((ulong)0xffffffffffffffcf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffcf << 24) | ((ulong)0xffffffffffffffcf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffcf << 40) | ((ulong)0xffffffffffffffcf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffcf << 56) | ((ulong)0xffffffffffffffcf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0xffffffffffffffbf << 8) | ((ulong)0xffffffffffffffbf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffbf << 24) | ((ulong)0xffffffffffffffbf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffbf << 40) | ((ulong)0xffffffffffffffbf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffbf << 56) | ((ulong)0xffffffffffffffbf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0xffffffffffffffaf << 8) | ((ulong)0xffffffffffffffaf >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffffaf << 24) | ((ulong)0xffffffffffffffaf >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffffaf << 40) | ((ulong)0xffffffffffffffaf >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffffaf << 56) | ((ulong)0xffffffffffffffaf >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0xffffffffffffff9f << 8) | ((ulong)0xffffffffffffff9f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff9f << 24) | ((ulong)0xffffffffffffff9f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff9f << 40) | ((ulong)0xffffffffffffff9f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff9f << 56) | ((ulong)0xffffffffffffff9f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0xffffffffffffff8f << 8) | ((ulong)0xffffffffffffff8f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff8f << 24) | ((ulong)0xffffffffffffff8f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff8f << 40) | ((ulong)0xffffffffffffff8f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff8f << 56) | ((ulong)0xffffffffffffff8f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y8 ^= (((((ulong)0xffffffffffffff7f << 8) | ((ulong)0xffffffffffffff7f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff7f << 24) | ((ulong)0xffffffffffffff7f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff7f << 40) | ((ulong)0xffffffffffffff7f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff7f << 56) | ((ulong)0xffffffffffffff7f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y9 ^= (((((ulong)0xffffffffffffff6f << 8) | ((ulong)0xffffffffffffff6f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff6f << 24) | ((ulong)0xffffffffffffff6f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff6f << 40) | ((ulong)0xffffffffffffff6f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff6f << 56) | ((ulong)0xffffffffffffff6f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y10 ^= (((((ulong)0xffffffffffffff5f << 8) | ((ulong)0xffffffffffffff5f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff5f << 24) | ((ulong)0xffffffffffffff5f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff5f << 40) | ((ulong)0xffffffffffffff5f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff5f << 56) | ((ulong)0xffffffffffffff5f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y11 ^= (((((ulong)0xffffffffffffff4f << 8) | ((ulong)0xffffffffffffff4f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff4f << 24) | ((ulong)0xffffffffffffff4f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff4f << 40) | ((ulong)0xffffffffffffff4f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff4f << 56) | ((ulong)0xffffffffffffff4f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y12 ^= (((((ulong)0xffffffffffffff3f << 8) | ((ulong)0xffffffffffffff3f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff3f << 24) | ((ulong)0xffffffffffffff3f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff3f << 40) | ((ulong)0xffffffffffffff3f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff3f << 56) | ((ulong)0xffffffffffffff3f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y13 ^= (((((ulong)0xffffffffffffff2f << 8) | ((ulong)0xffffffffffffff2f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff2f << 24) | ((ulong)0xffffffffffffff2f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff2f << 40) | ((ulong)0xffffffffffffff2f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff2f << 56) | ((ulong)0xffffffffffffff2f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y14 ^= (((((ulong)0xffffffffffffff1f << 8) | ((ulong)0xffffffffffffff1f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff1f << 24) | ((ulong)0xffffffffffffff1f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff1f << 40) | ((ulong)0xffffffffffffff1f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff1f << 56) | ((ulong)0xffffffffffffff1f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));
            y15 ^= (((((ulong)0xffffffffffffff0f << 8) | ((ulong)0xffffffffffffff0f >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xffffffffffffff0f << 24) | ((ulong)0xffffffffffffff0f >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xffffffffffffff0f << 40) | ((ulong)0xffffffffffffff0f >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xffffffffffffff0f << 56) | ((ulong)0xffffffffffffff0f >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 8) | ((ulong)13 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 24) | ((ulong)13 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 40) | ((ulong)13 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56) | ((ulong)13 >> (64 - 56))) & (0xFF000000FF000000)));

            s15 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            s14 = s_T[0 * 256 + (byte)(y15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            s13 = s_T[0 * 256 + (byte)(y14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            s12 = s_T[0 * 256 + (byte)(y13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            s11 = s_T[0 * 256 + (byte)(y12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            s10 = s_T[0 * 256 + (byte)(y11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            s9 = s_T[0 * 256 + (byte)(y10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y15 >> (8 * 7))];
            s8 = s_T[0 * 256 + (byte)(y9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y14 >> (8 * 7))];
            s7 = s_T[0 * 256 + (byte)(y8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y13 >> (8 * 7))];
            s6 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y12 >> (8 * 7))];
            s5 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y11 >> (8 * 7))];
            s4 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y10 >> (8 * 7))];
            s3 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y9 >> (8 * 7))];
            s2 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y8 >> (8 * 7))];
            s1 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            s0 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];

            w0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;

            z15 = s_T[0 * 256 + (byte)(w15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w10 >> (8 * 7))];
            z14 = s_T[0 * 256 + (byte)(w14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w9 >> (8 * 7))];
            z13 = s_T[0 * 256 + (byte)(w13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w8 >> (8 * 7))];
            z12 = s_T[0 * 256 + (byte)(w12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w7 >> (8 * 7))];
            z11 = s_T[0 * 256 + (byte)(w11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w6 >> (8 * 7))];
            z10 = s_T[0 * 256 + (byte)(w10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w5 >> (8 * 7))];
            z9 = s_T[0 * 256 + (byte)(w9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w4 >> (8 * 7))];
            z8 = s_T[0 * 256 + (byte)(w8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w3 >> (8 * 7))];
            z7 = s_T[0 * 256 + (byte)(w7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w2 >> (8 * 7))];
            z6 = s_T[0 * 256 + (byte)(w6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w1 >> (8 * 7))];
            z5 = s_T[0 * 256 + (byte)(w5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w0 >> (8 * 7))];
            z4 = s_T[0 * 256 + (byte)(w4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w15 >> (8 * 7))];
            z3 = s_T[0 * 256 + (byte)(w3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w14 >> (8 * 7))];
            z2 = s_T[0 * 256 + (byte)(w2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w13 >> (8 * 7))];
            z1 = s_T[0 * 256 + (byte)(w1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w12 >> (8 * 7))];
            z0 = s_T[0 * 256 + (byte)(w0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w11 >> (8 * 7))];

            for (int i = 1; i < 13; i += 2)
            {
                z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)i << 56 << 8) | ((ulong)i << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)i << 56 << 24) | ((ulong)i << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)i << 56 << 40) | ((ulong)i << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)i << 56 << 56) | ((ulong)i << 56 >> (64 - 56))) & (0xFF000000FF000000)));

                y15 = s_T[0 * 256 + (byte)(z15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z10 >> (8 * 7))];
                y14 = s_T[0 * 256 + (byte)(z14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z9 >> (8 * 7))];
                y13 = s_T[0 * 256 + (byte)(z13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z8 >> (8 * 7))];
                y12 = s_T[0 * 256 + (byte)(z12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
                y11 = s_T[0 * 256 + (byte)(z11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
                y10 = s_T[0 * 256 + (byte)(z10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
                y9 = s_T[0 * 256 + (byte)(z9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
                y8 = s_T[0 * 256 + (byte)(z8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
                y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
                y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
                y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
                y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z15 >> (8 * 7))];
                y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z14 >> (8 * 7))];
                y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z13 >> (8 * 7))];
                y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z12 >> (8 * 7))];
                y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z11 >> (8 * 7))];

                y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(i + 1) << 56 << 8) | ((ulong)(i + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(i + 1) << 56 << 24) | ((ulong)(i + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(i + 1) << 56 << 40) | ((ulong)(i + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(i + 1) << 56 << 56) | ((ulong)(i + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));

                z15 = s_T[0 * 256 + (byte)(y15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y10 >> (8 * 7))];
                z14 = s_T[0 * 256 + (byte)(y14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y9 >> (8 * 7))];
                z13 = s_T[0 * 256 + (byte)(y13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y8 >> (8 * 7))];
                z12 = s_T[0 * 256 + (byte)(y12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
                z11 = s_T[0 * 256 + (byte)(y11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
                z10 = s_T[0 * 256 + (byte)(y10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
                z9 = s_T[0 * 256 + (byte)(y9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
                z8 = s_T[0 * 256 + (byte)(y8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
                z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
                z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
                z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
                z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y15 >> (8 * 7))];
                z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y14 >> (8 * 7))];
                z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y13 >> (8 * 7))];
                z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y12 >> (8 * 7))];
                z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y11 >> (8 * 7))];;
            }

            z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            z15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));

            y15 = s_T[0 * 256 + (byte)(z15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z10 >> (8 * 7))];
            y14 = s_T[0 * 256 + (byte)(z14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z9 >> (8 * 7))];
            y13 = s_T[0 * 256 + (byte)(z13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z8 >> (8 * 7))];
            y12 = s_T[0 * 256 + (byte)(z12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
            y11 = s_T[0 * 256 + (byte)(z11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
            y10 = s_T[0 * 256 + (byte)(z10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
            y9 = s_T[0 * 256 + (byte)(z9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
            y8 = s_T[0 * 256 + (byte)(z8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z15 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z14 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z13 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z12 >> (8 * 7))];
            y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z11 >> (8 * 7))];

            m_state[0] ^= s0 ^ y0;
            m_state[1] ^= s1 ^ y1;
            m_state[2] ^= s2 ^ y2;
            m_state[3] ^= s3 ^ y3;
            m_state[4] ^= s4 ^ y4;
            m_state[5] ^= s5 ^ y5;
            m_state[6] ^= s6 ^ y6;
            m_state[7] ^= s7 ^ y7;
            m_state[8] ^= s8 ^ y8;
            m_state[9] ^= s9 ^ y9;
            m_state[10] ^= s10 ^ y10;
            m_state[11] ^= s11 ^ y11;
            m_state[12] ^= s12 ^ y12;
            m_state[13] ^= s13 ^ y13;
            m_state[14] ^= s14 ^ y14;
            m_state[15] ^= s15 ^ y15;
        }

        protected override void OutputTransformation()
        {
            ulong w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
            ulong y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15;
            ulong z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15;

            w0 = m_state[0];
            w1 = m_state[1];
            w2 = m_state[2];
            w3 = m_state[3];
            w4 = m_state[4];
            w5 = m_state[5];
            w6 = m_state[6];
            w7 = m_state[7];
            w8 = m_state[8];
            w9 = m_state[9];
            w10 = m_state[10];
            w11 = m_state[11];
            w12 = m_state[12];
            w13 = m_state[13];
            w14 = m_state[14];
            w15 = m_state[15];

            w0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;
            w15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^ 0;

            y15 = s_T[0 * 256 + (byte)(w15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w10 >> (8 * 7))];
            y14 = s_T[0 * 256 + (byte)(w14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w9 >> (8 * 7))];
            y13 = s_T[0 * 256 + (byte)(w13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w8 >> (8 * 7))];
            y12 = s_T[0 * 256 + (byte)(w12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w7 >> (8 * 7))];
            y11 = s_T[0 * 256 + (byte)(w11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w6 >> (8 * 7))];
            y10 = s_T[0 * 256 + (byte)(w10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w5 >> (8 * 7))];
            y9 = s_T[0 * 256 + (byte)(w9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w4 >> (8 * 7))];
            y8 = s_T[0 * 256 + (byte)(w8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w3 >> (8 * 7))];
            y7 = s_T[0 * 256 + (byte)(w7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w2 >> (8 * 7))];
            y6 = s_T[0 * 256 + (byte)(w6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w1 >> (8 * 7))];
            y5 = s_T[0 * 256 + (byte)(w5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w0 >> (8 * 7))];
            y4 = s_T[0 * 256 + (byte)(w4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w15 >> (8 * 7))];
            y3 = s_T[0 * 256 + (byte)(w3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w14 >> (8 * 7))];
            y2 = s_T[0 * 256 + (byte)(w2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w13 >> (8 * 7))];
            y1 = s_T[0 * 256 + (byte)(w1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w12 >> (8 * 7))];
            y0 = s_T[0 * 256 + (byte)(w0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(w1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(w2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(w3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(w4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(w5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(w6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(w11 >> (8 * 7))];

            for (int j = 1; j < 13; j += 2)
            {
                y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                y15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)j << 56 << 8) | ((ulong)j << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)j << 56 << 24) | ((ulong)j << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)j << 56 << 40) | ((ulong)j << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)j << 56 << 56) | ((ulong)j << 56 >> (64 - 56))) & (0xFF000000FF000000)));

                z15 = s_T[0 * 256 + (byte)(y15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y10 >> (8 * 7))];
                z14 = s_T[0 * 256 + (byte)(y14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y9 >> (8 * 7))];
                z13 = s_T[0 * 256 + (byte)(y13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y8 >> (8 * 7))];
                z12 = s_T[0 * 256 + (byte)(y12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
                z11 = s_T[0 * 256 + (byte)(y11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
                z10 = s_T[0 * 256 + (byte)(y10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
                z9 = s_T[0 * 256 + (byte)(y9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
                z8 = s_T[0 * 256 + (byte)(y8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
                z7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
                z6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
                z5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
                z4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y15 >> (8 * 7))];
                z3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y14 >> (8 * 7))];
                z2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y13 >> (8 * 7))];
                z1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y12 >> (8 * 7))];
                z0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y11 >> (8 * 7))];

                z0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));
                z15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                    (((((ulong)(j + 1) << 56 << 8) | ((ulong)(j + 1) << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                    ((((ulong)(j + 1) << 56 << 24) | ((ulong)(j + 1) << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                    ((((ulong)(j + 1) << 56 << 40) | ((ulong)(j + 1) << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                    ((((ulong)(j + 1) << 56 << 56) | ((ulong)(j + 1) << 56 >> (64 - 56))) & (0xFF000000FF000000)));

                y15 = s_T[0 * 256 + (byte)(z15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z0 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z2 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z4 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z10 >> (8 * 7))];
                y14 = s_T[0 * 256 + (byte)(z14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z15 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z1 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z3 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z9 >> (8 * 7))];
                y13 = s_T[0 * 256 + (byte)(z13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z14 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z0 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z2 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z8 >> (8 * 7))];
                y12 = s_T[0 * 256 + (byte)(z12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z13 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z15 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z1 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z7 >> (8 * 7))];
                y11 = s_T[0 * 256 + (byte)(z11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z12 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z14 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z0 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z6 >> (8 * 7))];
                y10 = s_T[0 * 256 + (byte)(z10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z11 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z13 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z15 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z5 >> (8 * 7))];
                y9 = s_T[0 * 256 + (byte)(z9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z10 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z12 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z14 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z4 >> (8 * 7))];
                y8 = s_T[0 * 256 + (byte)(z8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z9 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z11 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z13 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z3 >> (8 * 7))];
                y7 = s_T[0 * 256 + (byte)(z7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z8 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z10 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z12 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z2 >> (8 * 7))];
                y6 = s_T[0 * 256 + (byte)(z6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z7 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z9 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z11 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z1 >> (8 * 7))];
                y5 = s_T[0 * 256 + (byte)(z5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z6 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z8 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z10 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z0 >> (8 * 7))];
                y4 = s_T[0 * 256 + (byte)(z4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z5 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z7 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z9 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z15 >> (8 * 7))];
                y3 = s_T[0 * 256 + (byte)(z3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z4 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z6 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z8 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z14 >> (8 * 7))];
                y2 = s_T[0 * 256 + (byte)(z2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z3 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z5 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z7 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z13 >> (8 * 7))];
                y1 = s_T[0 * 256 + (byte)(z1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z2 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z4 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z6 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z12 >> (8 * 7))];
                y0 = s_T[0 * 256 + (byte)(z0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(z1 >> (8 * 1))] ^
                    s_T[2 * 256 + (byte)(z2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(z3 >> (8 * 3))] ^
                    s_T[4 * 256 + (byte)(z4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(z5 >> (8 * 5))] ^
                    s_T[6 * 256 + (byte)(z6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(z11 >> (8 * 7))];
            }

            y0 ^= (((((ulong)0x0000000000000000 << 8) | ((ulong)0x0000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x0000000000000000 << 24) | ((ulong)0x0000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x0000000000000000 << 40) | ((ulong)0x0000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x0000000000000000 << 56) | ((ulong)0x0000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y1 ^= (((((ulong)0x1000000000000000 << 8) | ((ulong)0x1000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x1000000000000000 << 24) | ((ulong)0x1000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x1000000000000000 << 40) | ((ulong)0x1000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x1000000000000000 << 56) | ((ulong)0x1000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y2 ^= (((((ulong)0x2000000000000000 << 8) | ((ulong)0x2000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x2000000000000000 << 24) | ((ulong)0x2000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x2000000000000000 << 40) | ((ulong)0x2000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x2000000000000000 << 56) | ((ulong)0x2000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y3 ^= (((((ulong)0x3000000000000000 << 8) | ((ulong)0x3000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x3000000000000000 << 24) | ((ulong)0x3000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x3000000000000000 << 40) | ((ulong)0x3000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x3000000000000000 << 56) | ((ulong)0x3000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y4 ^= (((((ulong)0x4000000000000000 << 8) | ((ulong)0x4000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x4000000000000000 << 24) | ((ulong)0x4000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x4000000000000000 << 40) | ((ulong)0x4000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x4000000000000000 << 56) | ((ulong)0x4000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y5 ^= (((((ulong)0x5000000000000000 << 8) | ((ulong)0x5000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x5000000000000000 << 24) | ((ulong)0x5000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x5000000000000000 << 40) | ((ulong)0x5000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x5000000000000000 << 56) | ((ulong)0x5000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y6 ^= (((((ulong)0x6000000000000000 << 8) | ((ulong)0x6000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x6000000000000000 << 24) | ((ulong)0x6000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x6000000000000000 << 40) | ((ulong)0x6000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x6000000000000000 << 56) | ((ulong)0x6000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y7 ^= (((((ulong)0x7000000000000000 << 8) | ((ulong)0x7000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x7000000000000000 << 24) | ((ulong)0x7000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x7000000000000000 << 40) | ((ulong)0x7000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x7000000000000000 << 56) | ((ulong)0x7000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y8 ^= (((((ulong)0x8000000000000000 << 8) | ((ulong)0x8000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x8000000000000000 << 24) | ((ulong)0x8000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x8000000000000000 << 40) | ((ulong)0x8000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x8000000000000000 << 56) | ((ulong)0x8000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y9 ^= (((((ulong)0x9000000000000000 << 8) | ((ulong)0x9000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0x9000000000000000 << 24) | ((ulong)0x9000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0x9000000000000000 << 40) | ((ulong)0x9000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0x9000000000000000 << 56) | ((ulong)0x9000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y10 ^= (((((ulong)0xa000000000000000 << 8) | ((ulong)0xa000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xa000000000000000 << 24) | ((ulong)0xa000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xa000000000000000 << 40) | ((ulong)0xa000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xa000000000000000 << 56) | ((ulong)0xa000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y11 ^= (((((ulong)0xb000000000000000 << 8) | ((ulong)0xb000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xb000000000000000 << 24) | ((ulong)0xb000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xb000000000000000 << 40) | ((ulong)0xb000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xb000000000000000 << 56) | ((ulong)0xb000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y12 ^= (((((ulong)0xc000000000000000 << 8) | ((ulong)0xc000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xc000000000000000 << 24) | ((ulong)0xc000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xc000000000000000 << 40) | ((ulong)0xc000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xc000000000000000 << 56) | ((ulong)0xc000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y13 ^= (((((ulong)0xd000000000000000 << 8) | ((ulong)0xd000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xd000000000000000 << 24) | ((ulong)0xd000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xd000000000000000 << 40) | ((ulong)0xd000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xd000000000000000 << 56) | ((ulong)0xd000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y14 ^= (((((ulong)0xe000000000000000 << 8) | ((ulong)0xe000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xe000000000000000 << 24) | ((ulong)0xe000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xe000000000000000 << 40) | ((ulong)0xe000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xe000000000000000 << 56) | ((ulong)0xe000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));
            y15 ^= (((((ulong)0xf000000000000000 << 8) | ((ulong)0xf000000000000000 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)0xf000000000000000 << 24) | ((ulong)0xf000000000000000 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)0xf000000000000000 << 40) | ((ulong)0xf000000000000000 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)0xf000000000000000 << 56) | ((ulong)0xf000000000000000 >> (64 - 56))) & (0xFF000000FF000000))) ^
                (((((ulong)13 << 56 << 8) | ((ulong)13 << 56 >> (64 - 8))) & (0x000000FF000000FF)) |
                ((((ulong)13 << 56 << 24) | ((ulong)13 << 56 >> (64 - 24))) & (0x0000FF000000FF00)) |
                ((((ulong)13 << 56 << 40) | ((ulong)13 << 56 >> (64 - 40))) & (0x00FF000000FF0000)) |
                ((((ulong)13 << 56 << 56) | ((ulong)13 << 56 >> (64 - 56))) & (0xFF000000FF000000)));

            w15 = s_T[0 * 256 + (byte)(y15 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y0 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y1 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y2 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y3 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y4 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y5 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y10 >> (8 * 7))];
            w14 = s_T[0 * 256 + (byte)(y14 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y15 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y0 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y1 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y2 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y3 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y4 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y9 >> (8 * 7))];
            w13 = s_T[0 * 256 + (byte)(y13 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y14 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y15 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y0 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y1 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y2 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y3 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y8 >> (8 * 7))];
            w12 = s_T[0 * 256 + (byte)(y12 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y13 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y14 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y15 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y0 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y1 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y2 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y7 >> (8 * 7))];
            w11 = s_T[0 * 256 + (byte)(y11 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y12 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y13 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y14 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y15 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y0 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y1 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y6 >> (8 * 7))];
            w10 = s_T[0 * 256 + (byte)(y10 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y11 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y12 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y13 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y14 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y15 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y0 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y5 >> (8 * 7))];
            w9 = s_T[0 * 256 + (byte)(y9 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y10 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y11 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y12 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y13 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y14 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y15 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y4 >> (8 * 7))];
            w8 = s_T[0 * 256 + (byte)(y8 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y9 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y10 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y11 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y12 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y13 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y14 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y3 >> (8 * 7))];
            w7 = s_T[0 * 256 + (byte)(y7 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y8 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y9 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y10 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y11 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y12 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y13 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y2 >> (8 * 7))];
            w6 = s_T[0 * 256 + (byte)(y6 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y7 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y8 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y9 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y10 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y11 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y12 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y1 >> (8 * 7))];
            w5 = s_T[0 * 256 + (byte)(y5 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y6 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y7 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y8 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y9 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y10 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y11 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y0 >> (8 * 7))];
            w4 = s_T[0 * 256 + (byte)(y4 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y5 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y6 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y7 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y8 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y9 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y10 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y15 >> (8 * 7))];
            w3 = s_T[0 * 256 + (byte)(y3 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y4 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y5 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y6 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y7 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y8 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y9 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y14 >> (8 * 7))];
            w2 = s_T[0 * 256 + (byte)(y2 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y3 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y4 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y5 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y6 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y7 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y8 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y13 >> (8 * 7))];
            w1 = s_T[0 * 256 + (byte)(y1 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y2 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y3 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y4 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y5 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y6 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y7 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y12 >> (8 * 7))];
            w0 = s_T[0 * 256 + (byte)(y0 >> (8 * 0))] ^ s_T[1 * 256 + (byte)(y1 >> (8 * 1))] ^
                s_T[2 * 256 + (byte)(y2 >> (8 * 2))] ^ s_T[3 * 256 + (byte)(y3 >> (8 * 3))] ^
                s_T[4 * 256 + (byte)(y4 >> (8 * 4))] ^ s_T[5 * 256 + (byte)(y5 >> (8 * 5))] ^
                s_T[6 * 256 + (byte)(y6 >> (8 * 6))] ^ s_T[7 * 256 + (byte)(y11 >> (8 * 7))];

            m_state[0] ^= w0;
            m_state[1] ^= w1;
            m_state[2] ^= w2;
            m_state[3] ^= w3;
            m_state[4] ^= w4;
            m_state[5] ^= w5;
            m_state[6] ^= w6;
            m_state[7] ^= w7;
            m_state[8] ^= w8;
            m_state[9] ^= w9;
            m_state[10] ^= w10;
            m_state[11] ^= w11;
            m_state[12] ^= w12;
            m_state[13] ^= w13;
            m_state[14] ^= w14;
            m_state[15] ^= w15;
        }


    };
}